diff --git a/opencl/source/builtin_kernels_simulation/CMakeLists.txt b/opencl/source/builtin_kernels_simulation/CMakeLists.txt index 01b8d799c2..34b5b4a0aa 100644 --- a/opencl/source/builtin_kernels_simulation/CMakeLists.txt +++ b/opencl/source/builtin_kernels_simulation/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -8,9 +8,6 @@ set(BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") diff --git a/opencl/source/builtin_kernels_simulation/scheduler_simulation.h b/opencl/source/builtin_kernels_simulation/scheduler_simulation.h deleted file mode 100644 index 6c91e9f62a..0000000000 --- a/opencl/source/builtin_kernels_simulation/scheduler_simulation.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "opencl/source/builtin_kernels_simulation/opencl_c.h" - -#include -#include -namespace NEO { -class GraphicsAllocation; -} - -namespace BuiltinKernelsSimulation { - -extern bool conditionReady; -extern std::thread threads[]; - -template -class SchedulerSimulation { - public: - void runSchedulerSimulation(NEO::GraphicsAllocation *queue, - NEO::GraphicsAllocation *commandsStack, - NEO::GraphicsAllocation *eventsPool, - NEO::GraphicsAllocation *secondaryBatchBuffer, - NEO::GraphicsAllocation *dsh, - NEO::GraphicsAllocation *reflectionSurface, - NEO::GraphicsAllocation *queueStorageBuffer, - NEO::GraphicsAllocation *ssh, - NEO::GraphicsAllocation *debugQueue); - - void cleanSchedulerSimulation(); - - static void startScheduler(uint32_t index, - NEO::GraphicsAllocation *queue, - NEO::GraphicsAllocation *commandsStack, - NEO::GraphicsAllocation *eventsPool, - NEO::GraphicsAllocation *secondaryBatchBuffer, - NEO::GraphicsAllocation *dsh, - NEO::GraphicsAllocation *reflectionSurface, - NEO::GraphicsAllocation *queueStorageBuffer, - NEO::GraphicsAllocation *ssh, - NEO::GraphicsAllocation *debugQueue); - - void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue, - NEO::GraphicsAllocation *commandsStack, - NEO::GraphicsAllocation *eventsPool, - NEO::GraphicsAllocation *secondaryBatchBuffer, - NEO::GraphicsAllocation *dsh, - NEO::GraphicsAllocation *reflectionSurface, - NEO::GraphicsAllocation *queueStorageBuffer, - NEO::GraphicsAllocation *ssh, - NEO::GraphicsAllocation *debugQueue); - - static void patchGpGpuWalker(uint secondLevelBatchOffset, - __global uint *secondaryBatchBuffer, - uint interfaceDescriptorOffset, - uint simdSize, - uint totalLocalWorkSize, - uint3 dimSize, - uint3 startPoint, - uint numberOfHwThreadsPerWg, - uint indirectPayloadSize, - uint ioHoffset); - static bool enabled; - static bool simulationRun; -}; - -template -bool SchedulerSimulation::enabled = true; - -template -bool SchedulerSimulation::simulationRun = false; - -} // namespace BuiltinKernelsSimulation diff --git a/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl b/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl deleted file mode 100644 index 0acff8a130..0000000000 --- a/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/memory_manager/graphics_allocation.h" - -#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" - -#include -#include -#include - -using namespace NEO; - -namespace BuiltinKernelsSimulation { - -template -void SchedulerSimulation::cleanSchedulerSimulation() { - threadIDToLocalIDmap.clear(); - delete pGlobalBarrier; -} - -template -void SchedulerSimulation::initializeSchedulerSimulation(GraphicsAllocation *queue, - GraphicsAllocation *commandsStack, - GraphicsAllocation *eventsPool, - GraphicsAllocation *secondaryBatchBuffer, - GraphicsAllocation *dsh, - GraphicsAllocation *reflectionSurface, - GraphicsAllocation *queueStorageBuffer, - GraphicsAllocation *ssh, - GraphicsAllocation *debugQueue) { - - localSize[0] = NUM_OF_THREADS; - localSize[1] = 1; - localSize[2] = 1; - - threadIDToLocalIDmap.clear(); - pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS); - - // Spawn Thread ID == 0 on main thread - for (uint32_t i = 1; i < NUM_OF_THREADS; i++) { - threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue); - } - - conditionReady = true; -} - -template -void SchedulerSimulation::runSchedulerSimulation(GraphicsAllocation *queue, - GraphicsAllocation *commandsStack, - GraphicsAllocation *eventsPool, - GraphicsAllocation *secondaryBatchBuffer, - GraphicsAllocation *dsh, - GraphicsAllocation *reflectionSurface, - GraphicsAllocation *queueStorageBuffer, - GraphicsAllocation *ssh, - GraphicsAllocation *debugQueue) { - simulationRun = true; - if (enabled) { - initializeSchedulerSimulation(queue, - commandsStack, - eventsPool, - secondaryBatchBuffer, - dsh, - reflectionSurface, - queueStorageBuffer, - ssh, - debugQueue); - - // start main thread with LID == 0 - startScheduler(0, - queue, - commandsStack, - eventsPool, - secondaryBatchBuffer, - dsh, - reflectionSurface, - queueStorageBuffer, - ssh, - debugQueue); - - // Wait for all threads on main thread - if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) { - - for (uint32_t i = 1; i < NUM_OF_THREADS; i++) - threads[i].join(); - - cleanSchedulerSimulation(); - } - } -}; - -} // namespace BuiltinKernelsSimulation diff --git a/opencl/source/command_queue/CMakeLists.txt b/opencl/source/command_queue/CMakeLists.txt index a321f4a964..5e782f5c18 100644 --- a/opencl/source/command_queue/CMakeLists.txt +++ b/opencl/source/command_queue/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -13,8 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl - ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h @@ -43,8 +41,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl - ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 43d3eafc0b..3f802121d8 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,7 +15,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" @@ -502,8 +501,6 @@ class CommandQueueHw : public CommandQueue { bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; - void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo); - void runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel); static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, @@ -513,10 +510,6 @@ class CommandQueueHw : public CommandQueue { size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch); - void processDeviceEnqueue(DeviceQueueHw *devQueueHw, - const MultiDispatchInfo &multiDispatchInfo, - TagNodeBase *hwTimeStamps, - bool &blocking); template void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, @@ -524,7 +517,6 @@ class CommandQueueHw : public CommandQueue { Event *event, TagNodeBase *&hwTimeStamps, bool blockQueue, - DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies); diff --git a/opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl b/opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl deleted file mode 100644 index 7a323db4b9..0000000000 --- a/opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/command_queue/command_queue_hw.h" - -namespace NEO { - -template -void CommandQueueHw::runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel) { -} -} // namespace NEO diff --git a/opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl b/opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl deleted file mode 100644 index c72f4d060c..0000000000 --- a/opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/command_queue/command_queue_hw.h" - -namespace NEO { - -template -void CommandQueueHw::runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel) { - BuiltinKernelsSimulation::SchedulerSimulation simulation; - simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(), - devQueueHw.getStackBuffer(), - devQueueHw.getEventPoolBuffer(), - devQueueHw.getSlbBuffer(), - devQueueHw.getDshBuffer(), - parentKernel.getKernelReflectionSurface(), - devQueueHw.getQueueStorageBuffer(), - this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), - devQueueHw.getDebugQueue()); -} -} // namespace NEO diff --git a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl index 6fd496a361..d4f5eb26c3 100644 --- a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl +++ b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,6 @@ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue_hw_base.inl" -#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl" #include "opencl/source/memory_manager/resource_surface.h" namespace NEO { diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 4e8ada27c3..399f65a2e6 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" -#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" @@ -67,45 +66,40 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount auto auxTranslationMode = AuxTranslationMode::None; - if (DebugManager.flags.ForceDispatchScheduler.get()) { - forceDispatchScheduler(multiDispatchInfo); + kernel->updateAuxTranslationRequired(); + if (kernel->isAuxTranslationRequired()) { + kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); + multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); + + if (!kernelObjsForAuxTranslation.empty()) { + auxTranslationMode = HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()); + } + } + + if (AuxTranslationMode::Builtin == auxTranslationMode) { + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); + builtInLock.takeOwnership(builder, this->context); + + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); + } + + if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { + DispatchInfoBuilder builder(getClDevice()); + builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); + builder.setKernel(kernel); + builder.bake(multiDispatchInfo); } else { + auto builder = kernel->getKernelInfo().builtinDispatchBuilder; + builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); - kernel->updateAuxTranslationRequired(); - if (kernel->isAuxTranslationRequired()) { - kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); - multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); - - if (!kernelObjsForAuxTranslation.empty()) { - auxTranslationMode = HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()); - } + if (multiDispatchInfo.size() == 0) { + return; } + } - if (AuxTranslationMode::Builtin == auxTranslationMode) { - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); - builtInLock.takeOwnership(builder, this->context); - - dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); - } - - if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { - DispatchInfoBuilder builder(getClDevice()); - builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); - builder.setKernel(kernel); - builder.bake(multiDispatchInfo); - } else { - auto builder = kernel->getKernelInfo().builtinDispatchBuilder; - builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); - - if (multiDispatchInfo.size() == 0) { - return; - } - } - - if (AuxTranslationMode::Builtin == auxTranslationMode) { - UNRECOVERABLE_IF(kernel->isParentKernel); - dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); - } + if (AuxTranslationMode::Builtin == auxTranslationMode) { + UNRECOVERABLE_IF(kernel->isParentKernel); + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } if (AuxTranslationMode::Blit == auxTranslationMode) { @@ -115,35 +109,6 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount enqueueHandler(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); } -template -void CommandQueueHw::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) { - SchedulerKernel &scheduler = getContext().getSchedulerKernel(); - - auto devQueue = this->getContext().getDefaultDeviceQueue(); - DeviceQueueHw *devQueueHw = castToObjectOrAbort>(devQueue); - - DispatchInfo dispatchInfo(devQueue->getClDevice(), &scheduler, 1, Vec3(scheduler.getGws(), 1, 1), Vec3(scheduler.getLws(), 1, 1), Vec3(0, 0, 0)); - Vec3 workGroupCount = generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize()); - dispatchInfo.setTotalNumberOfWorkgroups(workGroupCount); - dispatchInfo.setNumberOfWorkgroups(workGroupCount); - - scheduler.createReflectionSurface(); - GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface(); - - devQueueHw->resetDeviceQueue(); - - scheduler.setArgs(devQueueHw->getQueueBuffer(), - devQueueHw->getStackBuffer(), - devQueueHw->getEventPoolBuffer(), - devQueueHw->getSlbBuffer(), - devQueueHw->getDshBuffer(), - reflectionSurface, - devQueueHw->getQueueStorageBuffer(), - this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation()); - - multiDispatchInfo.push(dispatchInfo); -} - template template void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, @@ -163,8 +128,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); - auto devQueue = this->getContext().getDefaultDeviceQueue(); - DeviceQueueHw *devQueueHw = castToObject>(devQueue); TagNodeBase *hwTimeStamps = nullptr; CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); @@ -183,11 +146,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType); - if (parentKernel && !blockQueue) { - while (!devQueueHw->isEMCriticalSectionFree()) - ; - } - enqueueHandlerHook(commandType, multiDispatchInfo); bool clearDependenciesForSubCapture = false; @@ -262,7 +220,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), - hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(), + hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(), timestampPacketDependencies); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); @@ -310,9 +268,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } } if (!blockQueue) { - if (parentKernel) { - processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking); - } if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) { csrDeps.makeResident(computeCommandStreamReceiver); @@ -332,15 +287,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, taskLevel, printfHandler.get(), getBcsForAuxTranslation()); - - if (parentKernel) { - computeCommandStreamReceiver.setMediaVFEStateDirty(true); - - if (devQueueHw->getSchedulerReturnInstance() > 0) { - waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); - this->runSchedulerSimulation(*devQueueHw, *parentKernel); - } - } } else if (enqueueProperties.isFlushWithoutKernelRequired()) { completionStamp = enqueueCommandWithoutKernel( surfacesForResidency, @@ -438,7 +384,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf Event *event, TagNodeBase *&hwTimeStamps, bool blockQueue, - DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies) { @@ -474,10 +419,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue()); parentKernel->patchEventPool(context->getDefaultDeviceQueue()); parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get()); - if (!blockQueue) { - devQueueHw->resetDeviceQueue(); - devQueueHw->acquireEMCriticalSection(); - } } if (event && this->isPerfCountersEnabled()) { @@ -651,56 +592,6 @@ void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(Comm EncodeStoreMMIO::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress); } -template -void CommandQueueHw::processDeviceEnqueue(DeviceQueueHw *devQueueHw, - const MultiDispatchInfo &multiDispatchInfo, - TagNodeBase *hwTimeStamps, - bool &blocking) { - auto parentKernel = multiDispatchInfo.peekParentKernel(); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType()); - - uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1; - devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM), - *devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), - parentKernel, - (uint32_t)multiDispatchInfo.size(), - getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(), - taskCount, - hwTimeStamps, - isCcsUsed); - - SchedulerKernel &scheduler = getContext().getSchedulerKernel(); - - scheduler.setArgs(devQueueHw->getQueueBuffer(), - devQueueHw->getStackBuffer(), - devQueueHw->getEventPoolBuffer(), - devQueueHw->getSlbBuffer(), - devQueueHw->getDshBuffer(), - parentKernel->getKernelReflectionSurface(), - devQueueHw->getQueueStorageBuffer(), - this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), - devQueueHw->getDebugQueue()); - - auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); - GpgpuWalkerHelper::dispatchScheduler( - *this->commandStream, - *devQueueHw, - preemptionMode, - scheduler, - &getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), - devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), - isCcsUsed); - - scheduler.makeResident(getGpgpuCommandStreamReceiver()); - - parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver()); - - if (parentKernel->isAuxTranslationRequired()) { - blocking = true; - } -} - template void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) { auto isQueueBlockedStatus = isQueueBlocked(); @@ -842,17 +733,8 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( IndirectHeap *dsh = nullptr; IndirectHeap *ioh = nullptr; - if (multiDispatchInfo.peekParentKernel()) { - DeviceQueueHw *pDevQueue = castToObject>(this->getContext().getDefaultDeviceQueue()); - DEBUG_BREAK_IF(pDevQueue == nullptr); - dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - // In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks. - ioh = dsh; - implicitFlush = true; - } else { - dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); - ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); - } + dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); + ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto allocNeedsFlushDC = false; if (!device->isFullRangeSvm()) { diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index e46c2633fe..2ed3207c9d 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,7 +20,6 @@ #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" @@ -84,15 +83,6 @@ class GpgpuWalkerHelper { TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); - static void dispatchScheduler( - LinearStream &commandStream, - DeviceQueueHw &devQueueHw, - PreemptionMode preemptionMode, - SchedulerKernel &scheduler, - IndirectHeap *ssh, - IndirectHeap *dsh, - bool isCcsUsed); - static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd); private: @@ -142,11 +132,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) { if (heapType == IndirectHeap::SURFACE_STATE) { expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT) - { - DeviceQueueHw *pDevQueue = castToObject>(commandQueue.getContext().getDefaultDeviceQueue()); - DEBUG_BREAK_IF(pDevQueue == nullptr); - ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); } } diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 1b78ca5bc0..3d7680b549 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -191,17 +191,12 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c return expectedSizeCS; } - Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); for (auto &dispatchInfo : multiDispatchInfo) { expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo); size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0; expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); } - if (parentKernel) { - SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel(); - expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{}); - } if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps); expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); diff --git a/opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl b/opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl deleted file mode 100644 index 71dd61714b..0000000000 --- a/opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/command_queue/gpgpu_walker.h" - -namespace NEO { -template -void GpgpuWalkerHelper::dispatchScheduler( - LinearStream &commandStream, - DeviceQueueHw &devQueueHw, - PreemptionMode preemptionMode, - SchedulerKernel &scheduler, - IndirectHeap *ssh, - IndirectHeap *dsh, - bool isCcsUsed) { - UNRECOVERABLE_IF(true); -} -} // namespace NEO \ No newline at end of file diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index 384b5cafc5..4d4aaaa753 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -16,7 +16,6 @@ #include "shared/source/helpers/simd_helper.h" #include "opencl/source/command_queue/gpgpu_walker_base.inl" -#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl" #include "opencl/source/platform/platform.h" namespace NEO { diff --git a/opencl/source/device_queue/CMakeLists.txt b/opencl/source/device_queue/CMakeLists.txt index 183fa5b01d..a53e591413 100644 --- a/opencl/source/device_queue/CMakeLists.txt +++ b/opencl/source/device_queue/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -8,10 +8,6 @@ set(RUNTIME_SRCS_DEVICE_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE}) diff --git a/opencl/source/device_queue/device_queue.cpp b/opencl/source/device_queue/device_queue.cpp index 017d798696..ca2b63615d 100644 --- a/opencl/source/device_queue/device_queue.cpp +++ b/opencl/source/device_queue/device_queue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,7 +12,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/queue_helpers.h" @@ -186,10 +185,6 @@ void DeviceQueue::resetDeviceQueue() { return; } -void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) { - return; -} - IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) { return nullptr; } diff --git a/opencl/source/device_queue/device_queue.h b/opencl/source/device_queue/device_queue.h index 91cb15dac3..191a349340 100644 --- a/opencl/source/device_queue/device_queue.h +++ b/opencl/source/device_queue/device_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -83,7 +83,6 @@ class DeviceQueue : public BaseObject<_device_queue> { } virtual void resetDeviceQueue(); - virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed); virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type); void acquireEMCriticalSection() { diff --git a/opencl/source/device_queue/device_queue_hw.h b/opencl/source/device_queue/device_queue_hw.h deleted file mode 100644 index 46703112d8..0000000000 --- a/opencl/source/device_queue/device_queue_hw.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "shared/source/command_stream/linear_stream.h" -#include "shared/source/helpers/ptr_math.h" -#include "shared/source/indirect_heap/indirect_heap.h" - -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/source/kernel/kernel.h" -#include "opencl/source/program/program.h" -#include "opencl/source/scheduler/scheduler_kernel.h" - -#include "hw_cmds.h" - -namespace NEO { - -template -class DeviceQueueHw : public DeviceQueue { - using BaseClass = DeviceQueue; - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; - using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; - using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; - using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; - using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; - using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; - using MI_MATH = typename GfxFamily::MI_MATH; - using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; - - public: - DeviceQueueHw(Context *context, - ClDevice *device, - cl_queue_properties &properties) : BaseClass(context, device, properties) { - allocateSlbBuffer(); - offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables; - igilQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - } - - static DeviceQueue *create(Context *context, - ClDevice *device, - cl_queue_properties &properties) { - return new (std::nothrow) DeviceQueueHw(context, device, properties); - } - - IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override; - - LinearStream *getSlbCS() { return &slbCS; } - void resetDSH(); - - size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler); - - void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override; - - void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override; - void resetDeviceQueue() override; - void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override; - - uint32_t getSchedulerReturnInstance() { - return igilQueue->m_controls.m_SchedulerEarlyReturn; - } - - static size_t getCSPrefetchSize(); - - protected: - void allocateSlbBuffer(); - size_t getMinimumSlbSize(); - size_t getWaCommandsSize(); - void addArbCheckCmdWa(); - void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder); - void addLriCmdWa(bool setArbCheck); - void addLriCmd(bool setArbCheck); - void addPipeControlCmdWa(bool isNoopCmd = false); - void initPipeControl(PIPE_CONTROL *pc); - void buildSlbDummyCommands(); - void addDcFlushToPipeControlWa(PIPE_CONTROL *pc); - - void addProfilingEndCmds(uint64_t timestampAddress); - static size_t getProfilingEndCmdsSize(); - - MOCKABLE_VIRTUAL void addMediaStateClearCmds(); - static size_t getMediaStateClearCmdsSize(); - - static size_t getExecutionModelCleanupSectionSize(); - static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed); - - LinearStream slbCS; - IGIL_CommandQueue *igilQueue = nullptr; -}; -} // namespace NEO diff --git a/opencl/source/device_queue/device_queue_hw_profiling.inl b/opencl/source/device_queue/device_queue_hw_profiling.inl deleted file mode 100644 index e6d4f8fb2b..0000000000 --- a/opencl/source/device_queue/device_queue_hw_profiling.inl +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/device_queue/device_queue_hw.h" - -namespace NEO { - -template -void DeviceQueueHw::addProfilingEndCmds(uint64_t timestampAddress) { - - auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); - auto pipeControlCmd = GfxFamily::cmdInitPipeControl; - pipeControlCmd.setCommandStreamerStallEnable(true); - *pipeControlSpace = pipeControlCmd; - - //low part - auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM)); - auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem; - GpgpuWalkerHelper::adjustMiStoreRegMemMode(&mICmdLow); - mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); - mICmdLow.setMemoryAddress(timestampAddress); - *mICmdLowSpace = mICmdLow; -} -} // namespace NEO diff --git a/opencl/source/gen11/command_queue_gen11.cpp b/opencl/source/gen11/command_queue_gen11.cpp index 43849821c8..9758cd0b31 100644 --- a/opencl/source/gen11/command_queue_gen11.cpp +++ b/opencl/source/gen11/command_queue_gen11.cpp @@ -10,7 +10,6 @@ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" -#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { diff --git a/opencl/source/gen11/enable_family_full_ocl_gen11.cpp b/opencl/source/gen11/enable_family_full_ocl_gen11.cpp index c5c00a2e77..a343b80c6a 100644 --- a/opencl/source/gen11/enable_family_full_ocl_gen11.cpp +++ b/opencl/source/gen11/enable_family_full_ocl_gen11.cpp @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/source/gen11/gpgpu_walker_gen11.cpp b/opencl/source/gen11/gpgpu_walker_gen11.cpp index a4f5a3a062..2f90f3f4d4 100644 --- a/opencl/source/gen11/gpgpu_walker_gen11.cpp +++ b/opencl/source/gen11/gpgpu_walker_gen11.cpp @@ -8,7 +8,6 @@ #include "shared/source/gen11/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" -#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { diff --git a/opencl/source/gen12lp/command_queue_gen12lp.cpp b/opencl/source/gen12lp/command_queue_gen12lp.cpp index 8b17126229..57dfe7fb59 100644 --- a/opencl/source/gen12lp/command_queue_gen12lp.cpp +++ b/opencl/source/gen12lp/command_queue_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,6 @@ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" -#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl" #include "command_queue_helpers_gen12lp.inl" diff --git a/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp b/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp index c2ba98d863..78db9d62c8 100644 --- a/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp +++ b/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp index d68f96ae1c..343a95da29 100644 --- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp +++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,6 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" -#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { diff --git a/opencl/source/gen8/command_queue_gen8.cpp b/opencl/source/gen8/command_queue_gen8.cpp index a82c6befc2..20ef5f7de8 100644 --- a/opencl/source/gen8/command_queue_gen8.cpp +++ b/opencl/source/gen8/command_queue_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,7 +10,6 @@ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" -#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { diff --git a/opencl/source/gen8/gpgpu_walker_gen8.cpp b/opencl/source/gen8/gpgpu_walker_gen8.cpp index 4afd33bb87..8e71b0738a 100644 --- a/opencl/source/gen8/gpgpu_walker_gen8.cpp +++ b/opencl/source/gen8/gpgpu_walker_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,6 @@ #include "shared/source/gen8/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" -#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { diff --git a/opencl/source/gen9/command_queue_gen9.cpp b/opencl/source/gen9/command_queue_gen9.cpp index 09259a6989..24e9149000 100644 --- a/opencl/source/gen9/command_queue_gen9.cpp +++ b/opencl/source/gen9/command_queue_gen9.cpp @@ -10,7 +10,6 @@ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" -#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { diff --git a/opencl/source/gen9/enable_family_full_ocl_gen9.cpp b/opencl/source/gen9/enable_family_full_ocl_gen9.cpp index e419b0baa1..0aceecefe7 100644 --- a/opencl/source/gen9/enable_family_full_ocl_gen9.cpp +++ b/opencl/source/gen9/enable_family_full_ocl_gen9.cpp @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/source/gen9/gpgpu_walker_gen9.cpp b/opencl/source/gen9/gpgpu_walker_gen9.cpp index 5c82e7306e..8ee04f5645 100644 --- a/opencl/source/gen9/gpgpu_walker_gen9.cpp +++ b/opencl/source/gen9/gpgpu_walker_gen9.cpp @@ -8,7 +8,6 @@ #include "shared/source/gen9/hw_cmds_base.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" -#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 69fd2b27f1..9b8b4f0707 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -145,7 +145,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); - bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); if (executionModelKernel) { while (!devQueue->isEMCriticalSectionFree()) @@ -175,39 +174,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate } makeTimestampPacketsResident(commandStreamReceiver); - if (executionModelKernel) { - uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1; - devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount, - commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed); - - SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel(); - - scheduler.setArgs(devQueue->getQueueBuffer(), - devQueue->getStackBuffer(), - devQueue->getEventPoolBuffer(), - devQueue->getSlbBuffer(), - dsh->getGraphicsAllocation(), - kernel->getKernelReflectionSurface(), - devQueue->getQueueStorageBuffer(), - ssh->getGraphicsAllocation(), - devQueue->getDebugQueue()); - - devQueue->dispatchScheduler( - *kernelOperation->commandStream, - scheduler, - preemptionMode, - ssh, - dsh, - isCcsUsed); - - scheduler.makeResident(commandStreamReceiver); - - // Update SLM usage - slmUsed |= scheduler.getSlmTotalSize() > 0; - - this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver); - } - if (kernelOperation->blitPropertiesContainer.size() > 0) { CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All); diff --git a/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp b/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp index 146ec8e95a..14f89479bc 100644 --- a/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp +++ b/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp b/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp index 72bcb6c87f..78e9dff40d 100644 --- a/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp +++ b/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp b/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp index 595bc45d71..52460b6d58 100644 --- a/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp +++ b/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,6 @@ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" diff --git a/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl b/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl index cbcfa22404..af0bbddb10 100644 --- a/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl +++ b/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,7 +10,6 @@ #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" #include "cl_api_tests.h" @@ -51,88 +50,4 @@ struct clSetDefaultDeviceCommandQueueApiTest : public api_tests { cl_command_queue deviceQueue = nullptr; }; -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenSettingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) { - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, deviceQueue); - EXPECT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(static_cast<_device_queue *>(deviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue())); -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenReplacingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, - 0, - 0}; - auto pDevice = castToObject(testedClDevice); - - if (pDevice->getDeviceInfo().maxOnDeviceQueues > 1) { - auto newDeviceQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal); - ASSERT_NE(nullptr, newDeviceQueue); - ASSERT_EQ(CL_SUCCESS, retVal); - - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, newDeviceQueue); - EXPECT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(static_cast<_device_queue *>(newDeviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue())); - - clReleaseCommandQueue(newDeviceQueue); - } -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullContextWhenSettingDefaultDeviceQueueThenClInvalidContextErrorIsReturned, DeviceEnqueueSupport) { - retVal = clSetDefaultDeviceCommandQueue(nullptr, testedClDevice, deviceQueue); - ASSERT_EQ(CL_INVALID_CONTEXT, retVal); -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceWhenSettingDefaultDeviceQueueThenClInvalidDeviceErrorIsReturned, DeviceEnqueueSupport) { - retVal = clSetDefaultDeviceCommandQueue(pContext, nullptr, deviceQueue); - ASSERT_EQ(CL_INVALID_DEVICE, retVal); -} - -TEST_F(clSetDefaultDeviceCommandQueueApiTest, GivenDeviceNotSupportingDeviceEnqueueWhenSettingDefaultDeviceQueueThenClInvalidOperationErrorIsReturned) { - DebugManagerStateRestore dbgRestorer; - DebugManager.flags.ForceDeviceEnqueueSupport.set(0); - - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, nullptr); - ASSERT_EQ(CL_INVALID_OPERATION, retVal); -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) { - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, nullptr); - ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenHostQueueAsDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) { - cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, 0, 0, 0}; - cl_command_queue hostQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal); - ASSERT_NE(nullptr, hostQueue); - ASSERT_EQ(CL_SUCCESS, retVal); - - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, hostQueue); - ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); - - retVal = clReleaseCommandQueue(hostQueue); - EXPECT_EQ(CL_SUCCESS, retVal); -} - -HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenIncorrectDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) { - auto context2 = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, - 0, - 0}; - cl_command_queue deviceQueueCtx2 = clCreateCommandQueueWithProperties(context2, testedClDevice, properties, &retVal); - ASSERT_NE(nullptr, deviceQueueCtx2); - ASSERT_EQ(CL_SUCCESS, retVal); - - retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, deviceQueueCtx2); - ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); - - retVal = clReleaseCommandQueue(deviceQueueCtx2); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseContext(context2); - EXPECT_EQ(CL_SUCCESS, retVal); -} } // namespace ULT diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index badb134730..b570b6ebb6 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -16,13 +16,11 @@ #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "reg_configs_common.h" @@ -997,30 +995,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabled EXPECT_TRUE(pipeControl->getDcFlushEnable()); } -HWTEST2_F(EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenMakeEnqueueBlocking, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice); - - MyCmdQ cmdQ(context, pClDevice); - size_t gws[3] = {1, 0, 0}; - - cl_queue_properties queueProperties = {}; - auto mockDevQueue = std::make_unique>(context, pClDevice, queueProperties); - context->setDefaultDeviceQueue(mockDevQueue.get()); - - MockParentKernel::CreateParams createParams{}; - std::unique_ptr parentKernel(MockParentKernel::create(*context, createParams)); - parentKernel->initialize(); - - parentKernel->auxTranslationRequired = false; - cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(0u, cmdQ.waitCalled); - mockDevQueue->getIgilQueue()->m_controls.m_CriticalSection = 0; - - parentKernel->auxTranslationRequired = true; - cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, cmdQ.waitCalled); -} - HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelButNoDeviceQueueWhenEnqueueIsCalledThenItReturnsInvalidOperation) { REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice); diff --git a/opencl/test/unit_test/device_queue/CMakeLists.txt b/opencl/test/unit_test/device_queue/CMakeLists.txt deleted file mode 100644 index 929510dde5..0000000000 --- a/opencl/test/unit_test/device_queue/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# -# Copyright (C) 2018-2021 Intel Corporation -# -# SPDX-License-Identifier: MIT -# - -set(IGDRCL_SRCS_tests_device_queue - ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/get_device_queue_info_tests.cpp -) -target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device_queue}) diff --git a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp deleted file mode 100644 index 519182bd45..0000000000 --- a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp +++ /dev/null @@ -1,793 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/utilities/tag_allocator.h" -#include "shared/test/common/cmd_parse/hw_parse.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" -#include "shared/test/common/helpers/unit_test_helper.h" -#include "shared/test/common/mocks/mock_device.h" - -#include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/helpers/hardware_commands_helper.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" - -#include "hw_cmds.h" - -#include - -using namespace NEO; -using namespace DeviceHostQueue; - -HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenQueueMatchesUnderlyingBuffer, DeviceEnqueueSupport) { - // profiling disabled - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - auto deviceQueueHw = castToHwType(deviceQueue); - - auto expected = getExpectedgilCmdQueueAfterReset(deviceQueue); - deviceQueueHw->resetDeviceQueue(); - - EXPECT_EQ(0, memcmp(deviceQueueHw->getQueueBuffer()->getUnderlyingBuffer(), - &expected, sizeof(IGIL_CommandQueue))); - - delete deviceQueue; - - //profiling enabled - deviceQueue = createQueueObject(deviceQueueProperties::minimumPropertiesWithProfiling); - ASSERT_NE(deviceQueue, nullptr); - deviceQueueHw = castToHwType(deviceQueue); - - expected = getExpectedgilCmdQueueAfterReset(deviceQueue); - deviceQueueHw->resetDeviceQueue(); - - EXPECT_EQ(1u, expected.m_controls.m_IsProfilingEnabled); - - EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(), - &expected, sizeof(IGIL_CommandQueue))); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenFirstStackElementAtValueOneDeviceEnqueueSupport, DeviceEnqueueSupport) { - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - auto deviceQueueHw = castToHwType(deviceQueue); - - deviceQueueHw->resetDeviceQueue(); - - auto stack = static_cast(deviceQueue->getStackBuffer()->getUnderlyingBuffer()); - stack += ((deviceQueue->getStackBuffer()->getUnderlyingBufferSize() / sizeof(uint32_t)) - 1); - EXPECT_EQ(*stack, 1u); // first stack element in surface at value "1" - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcquiringEmCrticalSectionThenSectionIsNotAcquired, DeviceEnqueueSupport) { - DebugManagerStateRestore dbgRestorer; - - DebugManager.flags.EnableNullHardware.set(1); - - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - auto deviceQueueHw = castToHwType(deviceQueue); - - deviceQueueHw->acquireEMCriticalSection(); - - EXPECT_TRUE(deviceQueueHw->isEMCriticalSectionFree()); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeIsGreaterThanZero, DeviceEnqueueSupport) { - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, - deviceQueueProperties::minimumProperties[0]); - - EXPECT_NE(0u, mockDeviceQueueHw->getCSPrefetchSize()); - delete mockDeviceQueueHw; -} - -HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) { - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, - deviceQueueProperties::minimumProperties[0]); - - mockDeviceQueueHw->addLriCmd(true); - - HardwareParse hwParser; - auto *slbCS = mockDeviceQueueHw->getSlbCS(); - - hwParser.parseCommands(*slbCS, 0); - auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - - ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); - - MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; - - EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); - EXPECT_EQ(0x100u, loadRegImm->getDataDword()); - - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed()); - delete mockDeviceQueueHw; -} - -HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) { - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, - deviceQueueProperties::minimumProperties[0]); - - mockDeviceQueueHw->addLriCmd(false); - - HardwareParse hwParser; - auto *slbCS = mockDeviceQueueHw->getSlbCS(); - - hwParser.parseCommands(*slbCS, 0); - auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - - ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); - - MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; - - EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); - EXPECT_EQ(0u, loadRegImm->getDataDword()); - - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed()); - delete mockDeviceQueueHw; -} - -HWTEST2_F(DeviceQueueHwTest, GivenDeviceQueueHWWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) { - auto timestampResolution = static_cast(device->getProfilingTimerResolution()); - - auto deviceQueue = std::unique_ptr(createQueueObject()); - ASSERT_NE(deviceQueue, nullptr); - - auto eventPoolBuffer = reinterpret_cast(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer()); - - EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution); -} - -class DeviceQueueSlb : public DeviceQueueHwTest { - public: - template - void *compareCmds(void *position, Cmd &cmd) { - EXPECT_EQ(0, memcmp(position, &cmd, sizeof(Cmd))); - return ptrOffset(position, sizeof(Cmd)); - } - void *compareCmdsWithSize(void *position, void *cmd, size_t size) { - EXPECT_EQ(0, memcmp(position, cmd, size)); - return ptrOffset(position, size); - } -}; - -HWTEST2_F(DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSizeIsAllocated, DeviceEnqueueSupport) { - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0])); - - LinearStream *slbCS = mockDeviceQueueHw->getSlbCS(); - size_t expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128; - expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); - expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); - - expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); - expectedSize += (4 * MemoryConstants::pageSize); - - EXPECT_LE(expectedSize, slbCS->getAvailableSpace()); -} - -HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAreCorrect, DeviceEnqueueSupport) { - auto mockDeviceQueueHw = - new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); - auto mockDeviceQueueHwWithProfiling = - new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumPropertiesWithProfiling[0]); - - LinearStream *slbCS = mockDeviceQueueHw->getSlbCS(); - auto expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128; - expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); - - mockDeviceQueueHw->resetDeviceQueue(); - mockDeviceQueueHwWithProfiling->resetDeviceQueue(); - EXPECT_EQ(slbCS->getUsed(), expectedSize); - EXPECT_EQ(mockDeviceQueueHwWithProfiling->getSlbCS()->getUsed(), expectedSize); - - auto cmds = mockDeviceQueueHw->expectedCmds; - auto cmdsWithProfiling = mockDeviceQueueHwWithProfiling->expectedCmds; - - void *currCmd = slbCS->getCpuBase(); - void *currCmdWithProfiling = mockDeviceQueueHwWithProfiling->getSlbCS()->getCpuBase(); - for (size_t i = 0; i < 128; i++) { - currCmd = compareCmds(currCmd, cmds.mediaStateFlush); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush); - - if (mockDeviceQueueHw->arbCheckWa) { - currCmd = compareCmds(currCmd, cmds.arbCheck); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck); - } - if (mockDeviceQueueHw->miAtomicWa) { - currCmd = compareCmds(currCmd, cmds.miAtomic); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.miAtomic); - } - - currCmd = compareCmds(currCmd, cmds.mediaIdLoad); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaIdLoad); - - if (mockDeviceQueueHw->lriWa) { - currCmd = compareCmds(currCmd, cmds.lriTrue); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriTrue); - } - - currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); - - if (mockDeviceQueueHw->pipeControlWa) { - currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); - } - - currCmd = compareCmds(currCmd, cmds.gpgpuWalker); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.gpgpuWalker); - - currCmd = compareCmds(currCmd, cmds.mediaStateFlush); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush); - - if (mockDeviceQueueHw->arbCheckWa) { - currCmd = compareCmds(currCmd, cmds.arbCheck); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck); - } - - currCmd = compareCmds(currCmd, cmds.pipeControl); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); - - if (mockDeviceQueueHw->pipeControlWa) { - currCmd = compareCmds(currCmd, cmds.pipeControl); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); - } - - if (mockDeviceQueueHw->lriWa) { - currCmd = compareCmds(currCmd, cmds.lriFalse); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriFalse); - } - - currCmd = compareCmdsWithSize(currCmd, cmds.prefetch, DeviceQueueHw::getCSPrefetchSize()); - currCmdWithProfiling = compareCmdsWithSize(currCmdWithProfiling, cmdsWithProfiling.prefetch, DeviceQueueHw::getCSPrefetchSize()); - } - - currCmd = compareCmds(currCmd, cmds.bbStart); - currCmdWithProfiling = compareCmds(currCmdWithProfiling, mockDeviceQueueHwWithProfiling->expectedCmds.bbStart); - - delete mockDeviceQueueHw; - delete mockDeviceQueueHwWithProfiling; -} - -HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect, DeviceEnqueueSupport) { - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, - deviceQueueProperties::minimumProperties[0]); - - auto slb = mockDeviceQueueHw->getSlbBuffer(); - auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); - auto slbCopy = malloc(slb->getUnderlyingBufferSize()); - memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize()); - memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); - - auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - - // slbEndOffset < commandsSize * 128 - // always fill only 1 enqueue (after offset) - auto offset = static_cast(commandsSize) * 50; - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset; - mockDeviceQueueHw->resetDeviceQueue(); - EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset - EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset), - slbCopy, commandsSize)); // change 1 enqueue - EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize), - slbCopy, offset)); // dont touch memory after (offset + 1 enqueue) - compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128), - mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always on the same place - - // slbEndOffset == commandsSize * 128 - // dont fill commands - memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize()); - offset = static_cast(commandsSize) * 128; - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast(commandsSize); - mockDeviceQueueHw->resetDeviceQueue(); - EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues - compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128), - mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always in the same place - - delete mockDeviceQueueHw; - free(slbCopy); -} - -HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsCorrect, DeviceEnqueueSupport) { - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); - auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); - auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); - uint32_t taskCount = 7; - - mockDeviceQueueHw->buildSlbDummyCommands(); - uint64_t tagAddress = 0x123450000; - mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, nullptr, tagAddress, taskCount); - - HardwareParse hwParser; - auto *slbCS = mockDeviceQueueHw->getSlbCS(); - size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize); - size_t cleanupSectionOffsetToParse = cleanupSectionOffset; - - size_t slbUsed = slbCS->getUsed(); - slbUsed = alignUp(slbUsed, MemoryConstants::pageSize); - size_t slbMax = slbCS->getMaxAvailableSpace(); - - // 4 pages padding expected after cleanup section - EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed); - - if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { - cleanupSectionOffsetToParse += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2; - } - - hwParser.parseCommands(*slbCS, cleanupSectionOffsetToParse); - hwParser.findHardwareCommands(); - - uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset; - EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress); - EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize); - - auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); - - bool tagWriteFound = false; - while (auto pipeControlCmd = genCmdCast(*(++pipeControlItor))) { - if (pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - if (tagAddress == NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd)) { - tagWriteFound = true; - break; - } - } - } - - EXPECT_TRUE(tagWriteFound); - - auto bbEndItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - EXPECT_NE(hwParser.cmdList.end(), bbEndItor); - MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor; - uint64_t bbEndAddres = (uint64_t)bbEnd; - - EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres); - - delete mockParentKernel; - delete mockDeviceQueueHw; -} - -HWTEST2_F(DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmCleanupSectionIsAdded, DeviceEnqueueSupport) { - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - - auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); - auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); - auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); - uint32_t taskCount = 7; - - auto hwTimeStamp = pCommandQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); - mockDeviceQueueHw->buildSlbDummyCommands(); - mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, 0x123, taskCount); - - uint64_t eventTimestampAddr = igilCmdQueue->m_controls.m_EventTimestampAddress; - uint64_t contextCompleteAddr = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS); - EXPECT_EQ(contextCompleteAddr, eventTimestampAddr); - - HardwareParse hwParser; - auto *slbCS = mockDeviceQueueHw->getSlbCS(); - size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize); - size_t cleanupSectionOffsetToParse = cleanupSectionOffset; - - hwParser.parseCommands(*slbCS, cleanupSectionOffsetToParse); - hwParser.findHardwareCommands(); - - uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset; - EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress); - EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize); - - auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - - if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { - auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor); - - pipeControlItor = find(loadRegImmItor, hwParser.cmdList.end()); - pipeControlItor++; - } - - EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); - - PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; - EXPECT_NE(0u, pipeControl->getCommandStreamerStallEnable()); - - auto loadRegImmItor = find(pipeControlItor, hwParser.cmdList.end()); - - ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); - - MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; - - EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); - EXPECT_EQ(0u, loadRegImm->getDataDword()); - - pipeControlItor++; - EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); - - auto bbEndItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - EXPECT_NE(hwParser.cmdList.end(), bbEndItor); - MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor; - uint64_t bbEndAddres = (uint64_t)bbEnd; - - EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres); - - delete mockParentKernel; - delete mockDeviceQueueHw; -} - -HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBufferParamsAreCorrect, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - auto *devQueueHw = castToObject>(deviceQueue); - - auto heap = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - - ASSERT_NE(nullptr, heap); - - auto dshBuffer = deviceQueue->getDshBuffer()->getUnderlyingBuffer(); - auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize(); - - auto size = heap->getAvailableSpace(); - auto heapMemory = heap->getCpuBase(); - - // ExecutionModel DSH is offseted by colorCalcState, ParentKernel Interface Descriptor Data is located in first table just after colorCalcState - - EXPECT_EQ(dshBufferSize - DeviceQueue::colorCalcStateSize, size); - EXPECT_EQ(dshBuffer, heapMemory); - EXPECT_EQ(ptrOffset(dshBuffer, DeviceQueue::colorCalcStateSize), heap->getSpace(0)); - - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOffsetIsCorrect, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - auto *devQueueHw = castToObject>(deviceQueue); - - size_t offsetDsh = sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::interfaceDescriptorEntries * DeviceQueue::numberOfIDTables + DeviceQueue::colorCalcStateSize; - - EXPECT_EQ(devQueueHw->getDshOffset(), offsetDsh); - - delete deviceQueue; -} - -class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture, - public ::testing::WithParamInterface> { - public: - void SetUp() override { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); - - ExecutionModelKernelFixture::SetUp(std::get<0>(GetParam()), std::get<1>(GetParam())); - cl_queue_properties properties[5] = { - CL_QUEUE_PROPERTIES, - CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - 0, 0, 0}; - cl_int errcodeRet = 0; - - clDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; - device = &clDevice->getDevice(); - context = new MockContext(); - ASSERT_NE(nullptr, context); - - devQueue = DeviceQueue::create(context, clDevice, - *properties, - errcodeRet); - - ASSERT_NE(nullptr, devQueue); - } - void TearDown() override { - if (IsSkipped()) { - return; - } - - if (devQueue) { - delete devQueue; - } - if (context) { - delete context; - } - if (clDevice) { - delete clDevice; - } - ExecutionModelKernelFixture::TearDown(); - } - - Device *device = nullptr; - ClDevice *clDevice = nullptr; - DeviceQueue *devQueue = nullptr; - MockContext *context = nullptr; -}; - -HWTEST2_P(DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed, DeviceEnqueueSupport) { - EXPECT_TRUE(pKernel->isParentKernel); - - pKernel->createReflectionSurface(); - - auto *devQueueHw = castToObject>(devQueue); - - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); - - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - auto usedBeforeSSH = ssh->getUsed(); - auto usedBeforeDSH = dsh->getUsed(); - - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false); - auto usedAfterSSH = ssh->getUsed(); - auto usedAfterDSH = dsh->getUsed(); - - EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH); - - EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH); - - alignedFree(ssh->getCpuBase()); - delete ssh; -} - -HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet, DeviceEnqueueSupport) { - EXPECT_TRUE(pKernel->isParentKernel); - - pKernel->createReflectionSurface(); - - auto *devQueueHw = castToObject>(devQueue); - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); - - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - - uint32_t parentCount = 4; - - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - - EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID); - - alignedFree(ssh->getCpuBase()); - delete ssh; -} - -HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly, DeviceEnqueueSupport) { - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - - EXPECT_TRUE(pKernel->isParentKernel); - - pKernel->createReflectionSurface(); - - MockContext mockContext; - MockDeviceQueueHw *devQueueHw = new MockDeviceQueueHw(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); - - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - - uint32_t parentCount = 1; - - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - - EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); - EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize()); - EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); - EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh); - - alignedFree(ssh->getCpuBase()); - delete ssh; - delete devQueueHw; -} - -HWTEST2_P(DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled, DeviceEnqueueSupport) { - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - pKernel->createReflectionSurface(); - - MockContext mockContext; - auto devQueueHw = std::make_unique>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - - uint32_t parentCount = 1; - - auto blockManager = pKernel->getProgram()->getBlockKernelManager(); - auto iddCount = blockManager->getCount(); - for (uint32_t i = 0; i < iddCount; i++) { - const_cast(blockManager->getBlockKernelInfo(i)->kernelDescriptor).kernelAttributes.barrierCount = 1U; - } - - auto surfaceStateHeapSize = - HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - - auto iddStartPtr = static_cast(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize)); - auto iddStartIndex = parentCount; - for (uint32_t i = 0; i < iddCount; i++) { - EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable()); - } - - alignedFree(ssh->getCpuBase()); -} - -static const char *binaryFile = "simple_block_kernel"; -static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; - -INSTANTIATE_TEST_CASE_P(DeviceQueueHwWithKernel, - DeviceQueueHwWithKernel, - ::testing::Combine( - ::testing::Values(binaryFile), - ::testing::ValuesIn(KernelNames))); - -struct TheSimplestDeviceQueueFixture : testing::Test { - void SetUp() override { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); - } - void TearDown() override { - } -}; - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenResettingDeviceQueueThenEarlyReturnValuesAreSet, DeviceEnqueueSupport) { - - DebugManagerStateRestore dbgRestorer; - - DebugManager.flags.SchedulerSimulationReturnInstance.set(3); - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context; - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - mockDeviceQueueHw->resetDeviceQueue(); - - EXPECT_EQ(3u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturn); - EXPECT_EQ(0u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturnCounter); -} - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddihMediaStateClearCmdsThenCmdsAreAddedCorrectly, DeviceEnqueueSupport) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context; - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - HardwareParse hwParser; - auto *slbCS = mockDeviceQueueHw->getSlbCS(); - - mockDeviceQueueHw->addMediaStateClearCmds(); - - hwParser.parseCommands(*slbCS, 0); - hwParser.findHardwareCommands(); - - auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); - - if (mockDeviceQueueHw->pipeControlWa) { - pipeControlItor++; - EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); - } - - PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; - EXPECT_TRUE(pipeControl->getGenericMediaStateClear()); - - auto mediaVfeStateItor = find(pipeControlItor, hwParser.cmdList.end()); - - EXPECT_NE(hwParser.cmdList.end(), mediaVfeStateItor); -} - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddingExecutionModelCleanupSectionThenMediaStateIsCleared, DeviceEnqueueSupport) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; - - class MockDeviceQueueWithMediaStateClearRegistering : public MockDeviceQueueHw { - public: - MockDeviceQueueWithMediaStateClearRegistering(Context *context, - ClDevice *device, - cl_queue_properties &properties) : MockDeviceQueueHw(context, device, properties) { - } - - bool addMediaStateClearCmdsCalled = false; - void addMediaStateClearCmds() override { - addMediaStateClearCmdsCalled = true; - } - }; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context(device.get()); - std::unique_ptr mockDeviceQueueHw(new MockDeviceQueueWithMediaStateClearRegistering(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - std::unique_ptr mockParentKernel(MockParentKernel::create(context)); - uint32_t taskCount = 7; - mockDeviceQueueHw->buildSlbDummyCommands(); - - EXPECT_FALSE(mockDeviceQueueHw->addMediaStateClearCmdsCalled); - mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel.get(), nullptr, 0x123, taskCount); - EXPECT_TRUE(mockDeviceQueueHw->addMediaStateClearCmdsCalled); -} - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingMediaStateClearThenCmdsSizeIsCorrect, DeviceEnqueueSupport) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context; - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - size_t expectedSize = 2 * sizeof(PIPE_CONTROL) + sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE); - EXPECT_EQ(expectedSize, MockDeviceQueueHw::getMediaStateClearCmdsSize()); -} - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingExecutionModelCleanupThenSectionSizeIsCorrect, DeviceEnqueueSupport) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; - using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - using MI_MATH = typename FamilyType::MI_MATH; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context; - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - size_t expectedSize = sizeof(PIPE_CONTROL) + - 2 * sizeof(MI_LOAD_REGISTER_REG) + - sizeof(MI_LOAD_REGISTER_IMM) + - sizeof(PIPE_CONTROL) + - sizeof(MI_MATH) + - NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE); - - expectedSize += MockDeviceQueueHw::getProfilingEndCmdsSize(); - expectedSize += MockDeviceQueueHw::getMediaStateClearCmdsSize(); - - expectedSize += 4 * sizeof(PIPE_CONTROL); - expectedSize += sizeof(MI_BATCH_BUFFER_END); - - EXPECT_EQ(expectedSize, MockDeviceQueueHw::getExecutionModelCleanupSectionSize()); -} - -HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingProfilingEndThenCmdsSizeIsCorrect, DeviceEnqueueSupport) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context; - std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); - - size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM); - - EXPECT_EQ(expectedSize, MockDeviceQueueHw::getProfilingEndCmdsSize()); -} diff --git a/opencl/test/unit_test/device_queue/device_queue_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_tests.cpp deleted file mode 100644 index 41ebc67627..0000000000 --- a/opencl/test/unit_test/device_queue/device_queue_tests.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/device/device_info.h" -#include "shared/test/common/test_macros/matchers.h" - -#include "opencl/source/helpers/dispatch_info.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_program.h" - -using namespace NEO; -using namespace DeviceHostQueue; - -using DeviceQueueSimpleTest = ::testing::Test; - -HWTEST2_F(DeviceQueueSimpleTest, WhenExecutionModelDispatchIsSetupThenNoAdditionalActionsOccur, DeviceEnqueueSupport) { - DeviceQueue devQueue; - char buffer[20]; - - memset(buffer, 1, 20); - - size_t size = 20; - IndirectHeap ssh(buffer, size); - IndirectHeap dsh(buffer, size); - devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0, false); - - EXPECT_EQ(0u, ssh.getUsed()); - - for (uint32_t i = 0; i < 20; i++) { - EXPECT_EQ(1, buffer[i]); - } -} - -HWTEST2_F(DeviceQueueSimpleTest, WhenResettingDeviceQueueThenIndirectHeapIsNotUsed, DeviceEnqueueSupport) { - DeviceQueue devQueue; - devQueue.resetDeviceQueue(); - EXPECT_EQ(nullptr, devQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE)); -} - -class DeviceQueueTest : public DeviceHostQueueFixture { - public: - using BaseClass = DeviceHostQueueFixture; - void SetUp() override { - BaseClass::SetUp(); - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pContext); - device = pContext->getDevice(0); - - ASSERT_NE(device, nullptr); - } - - void TearDown() override { - BaseClass::TearDown(); - } - - void checkQueueBuffer(cl_uint expedtedSize) { - auto alignedExpectedSize = alignUp(expedtedSize, MemoryConstants::pageSize); - EXPECT_EQ(deviceQueue->getQueueSize(), expedtedSize); - ASSERT_NE(deviceQueue->getQueueBuffer(), nullptr); - EXPECT_EQ(deviceQueue->getQueueBuffer()->getUnderlyingBufferSize(), alignedExpectedSize); - } - - DeviceQueue *deviceQueue; - ClDevice *device; -}; - -HWTEST2_F(DeviceQueueTest, GivenDeviceQueueCapWhenCreatingAdditionalDeviceQueuesThenQueueIsNotCreated, DeviceEnqueueSupport) { - auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues; - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = 1; - - auto deviceQueue1 = createQueueObject(); - ASSERT_NE(deviceQueue1, nullptr); - EXPECT_EQ(deviceQueue1->getReference(), 1); - - auto deviceQueue2 = createQueueObject(); - EXPECT_EQ(deviceQueue2, nullptr); - - delete deviceQueue1; - - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; -} - -HWTEST2_F(DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) { - auto timestampResolution = static_cast(device->getProfilingTimerResolution()); - - auto deviceQueue = std::unique_ptr(createQueueObject()); - ASSERT_NE(deviceQueue, nullptr); - - auto eventPoolBuffer = reinterpret_cast(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer()); - - EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution); -} - -typedef DeviceQueueTest DeviceQueueBuffer; - -HWTEST2_F(DeviceQueueBuffer, GivenNoPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) { - auto &deviceInfo = device->getDeviceInfo(); - deviceQueue = createQueueObject(); // only minimal properties - ASSERT_NE(deviceQueue, nullptr); - checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize); - deviceQueue->release(); -} - -HWTEST2_F(DeviceQueueBuffer, GivenInvalidPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) { - cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1], - CL_QUEUE_SIZE, 0, 0}; - auto &deviceInfo = device->getDeviceInfo(); - - deviceQueue = createQueueObject(properties); // zero size - ASSERT_NE(deviceQueue, nullptr); - - checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize); - delete deviceQueue; - - properties[3] = static_cast(deviceInfo.queueOnDeviceMaxSize + 1); - deviceQueue = createQueueObject(properties); // greater than max - EXPECT_EQ(deviceQueue, nullptr); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueBuffer, GivenValidSizeWhenCreatingQueueThenProvidedSizeIsSet, DeviceEnqueueSupport) { - auto &deviceInfo = device->getDeviceInfo(); - cl_uint validSize = deviceInfo.queueOnDevicePreferredSize - 1; - cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1], - CL_QUEUE_SIZE, static_cast(validSize), - 0}; - - EXPECT_NE(validSize, alignUp(validSize, MemoryConstants::pageSize)); // create aligned - deviceQueue = createQueueObject(properties); - ASSERT_NE(deviceQueue, nullptr); - - checkQueueBuffer(validSize); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueBuffer, WhenDeviceQueueIsCreatedThenItIsCorrectlyInitialized, DeviceEnqueueSupport) { - auto &deviceInfo = device->getDeviceInfo(); - - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - IGIL_CommandQueue expectedIgilCmdQueue = getExpectedInitIgilCmdQueue(deviceQueue); - EXPECT_EQ(static_cast(deviceQueue->isProfilingEnabled()), expectedIgilCmdQueue.m_controls.m_IsProfilingEnabled); - - IGIL_EventPool expectedIgilEventPool = {0, 0, 0}; - expectedIgilEventPool.m_head = 0; - expectedIgilEventPool.m_size = deviceInfo.maxOnDeviceEvents; - expectedIgilEventPool.m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); - - // initialized header - EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(), - &expectedIgilCmdQueue, sizeof(IGIL_CommandQueue))); - - EXPECT_EQ(0, memcmp(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer(), - &expectedIgilEventPool, sizeof(IGIL_EventPool))); - - delete deviceQueue; -} - -typedef DeviceQueueTest DeviceQueueStackBuffer; - -HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenAllocatedResourcesAreZeroed, DeviceEnqueueSupport) { - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - EXPECT_THAT(deviceQueue->getQueueStorageBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize())); - EXPECT_THAT(deviceQueue->getStackBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getStackBuffer()->getUnderlyingBufferSize())); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenStackBufferIsAllocated, DeviceEnqueueSupport) { - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - auto maxEnqueue = deviceQueue->getQueueSize() / sizeof(IGIL_CommandHeader); - //stack can hold at most 3 full loads of commands - auto expectedStackSize = maxEnqueue * sizeof(uint32_t) * 3; - expectedStackSize = alignUp(expectedStackSize, MemoryConstants::pageSize); - - ASSERT_NE(deviceQueue->getStackBuffer(), nullptr); - EXPECT_EQ(deviceQueue->getStackBuffer()->getUnderlyingBufferSize(), expectedStackSize); - delete deviceQueue; -} - -typedef DeviceQueueTest DeviceQueueStorageBuffer; - -HWTEST2_F(DeviceQueueStorageBuffer, WhenDeviceQueueIsCreatedThenStorageBufferIsAllocated, DeviceEnqueueSupport) { - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - auto expectedStorageSize = deviceQueue->getQueueBuffer()->getUnderlyingBufferSize() * 2; - expectedStorageSize = alignUp(expectedStorageSize, MemoryConstants::pageSize); - - ASSERT_NE(deviceQueue->getQueueStorageBuffer(), nullptr); - EXPECT_EQ(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize(), expectedStorageSize); - delete deviceQueue; -} - -typedef DeviceQueueTest DefaultDeviceQueue; - -HWTEST2_F(DefaultDeviceQueue, GivenSingleDeviceQueueIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) { - cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0}; - - auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues; - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = 1; - - auto deviceQueue1 = createQueueObject(properties); - ASSERT_NE(deviceQueue1, nullptr); - - EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); - EXPECT_EQ(deviceQueue1->getReference(), 1); - - auto deviceQueue2 = createQueueObject(properties); - ASSERT_NE(deviceQueue2, nullptr); - - EXPECT_EQ(deviceQueue2, deviceQueue1); - - EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); - EXPECT_EQ(deviceQueue1->getReference(), 2); - - deviceQueue1->release(); - deviceQueue2->release(); - - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; -} - -HWTEST2_F(DefaultDeviceQueue, GivenMultipleDeviceQueuesIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) { - cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0}; - - auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues; - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = 2; - - auto deviceQueue1 = createQueueObject(properties); - ASSERT_NE(deviceQueue1, nullptr); - - EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); - EXPECT_EQ(deviceQueue1->getReference(), 1); - - auto deviceQueue2 = createQueueObject(properties); - ASSERT_NE(deviceQueue2, nullptr); - - EXPECT_EQ(deviceQueue2, deviceQueue1); - - EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); - EXPECT_EQ(deviceQueue1->getReference(), 2); - - deviceQueue1->release(); - deviceQueue2->release(); - - const_cast(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; -} - -typedef DeviceQueueTest DeviceQueueEventPool; - -HWTEST2_F(DeviceQueueEventPool, WhenDeviceQueueIsCreatedThenEventPoolBufferIsAllocated, DeviceEnqueueSupport) { - auto &deviceInfo = device->getDeviceInfo(); - - // number of events + event pool representation - auto expectedSize = static_cast(deviceInfo.maxOnDeviceEvents * sizeof(IGIL_DeviceEvent) + - sizeof(IGIL_EventPool)); - expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); - - auto deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - ASSERT_NE(deviceQueue->getEventPoolBuffer(), nullptr); - EXPECT_EQ(deviceQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), expectedSize); - - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBufferIsAllocated, DeviceEnqueueSupport) { - deviceQueue = createQueueObject(); - ASSERT_NE(deviceQueue, nullptr); - - ASSERT_NE(deviceQueue->getDshBuffer(), nullptr); - auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize(); - - EXPECT_LE(761856u, dshBufferSize); - delete deviceQueue; -} - -HWTEST2_F(DeviceQueueTest, WhenDispatchingSchedulerThenNoAssertsOccur, DeviceEnqueueSupport) { - DeviceQueue devQueue; - MockProgram program(toClDeviceVector(*device)); - MockCommandQueue cmdQ(nullptr, nullptr, 0, false); - KernelInfo info; - MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device); - LinearStream cmdStream; - - devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false); - delete kernel; -} diff --git a/opencl/test/unit_test/device_queue/get_device_queue_info_tests.cpp b/opencl/test/unit_test/device_queue/get_device_queue_info_tests.cpp deleted file mode 100644 index 57ffc43f17..0000000000 --- a/opencl/test/unit_test/device_queue/get_device_queue_info_tests.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/context/context.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" - -using namespace NEO; -using namespace DeviceHostQueue; - -class GetDeviceQueueInfoTest : public DeviceHostQueueFixture { - public: - using BaseClass = DeviceHostQueueFixture; - - void SetUp() override { - BaseClass::SetUp(); - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pContext); - deviceQueue = createQueueObject(deviceQueueProperties::allProperties); - ASSERT_NE(deviceQueue, nullptr); - } - - void TearDown() override { - if (deviceQueue) - delete deviceQueue; - BaseClass::TearDown(); - } - - DeviceQueue *deviceQueue = nullptr; -}; - -HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueContextWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_context contextReturned = nullptr; - - retVal = deviceQueue->getCommandQueueInfo( - CL_QUEUE_CONTEXT, - sizeof(contextReturned), - &contextReturned, - nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ((cl_context)pContext, contextReturned); -} - -HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_device_id deviceExpected = testedClDevice; - cl_device_id deviceIdReturned = nullptr; - - retVal = deviceQueue->getCommandQueueInfo( - CL_QUEUE_DEVICE, - sizeof(deviceIdReturned), - &deviceIdReturned, - nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(deviceExpected, deviceIdReturned); -} - -HWTEST2_F(GetDeviceQueueInfoTest, GivenQueuePropertiesWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_command_queue_properties propertiesReturned = 0; - - retVal = deviceQueue->getCommandQueueInfo( - CL_QUEUE_PROPERTIES, - sizeof(propertiesReturned), - &propertiesReturned, - nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(deviceQueueProperties::allProperties[1], propertiesReturned); -} - -HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueSizeWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_uint queueSizeReturned = 0; - - retVal = deviceQueue->getCommandQueueInfo( - CL_QUEUE_SIZE, - sizeof(queueSizeReturned), - &queueSizeReturned, - nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(deviceQueue->getQueueSize(), queueSizeReturned); -} - -// OCL 2.1 -HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceDefaultWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) { - cl_command_queue commandQueueReturned = nullptr; - - retVal = deviceQueue->getCommandQueueInfo( - CL_QUEUE_DEVICE_DEFAULT, - sizeof(commandQueueReturned), - &commandQueueReturned, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - - // 1 device queue is supported which is default - EXPECT_EQ(deviceQueue, commandQueueReturned); -} - -HWTEST2_F(GetDeviceQueueInfoTest, WhenGettingDeviceQueueInfoThenProfilingIsEnabled, DeviceEnqueueSupport) { - EXPECT_TRUE(deviceQueue->isProfilingEnabled()); -} - -HWTEST2_F(GetDeviceQueueInfoTest, GivenInvalidParamWhenGettingDeviceQueueInfoThenInvalidValueErrorIsReturned, DeviceEnqueueSupport) { - uint32_t tempValue = 0; - - retVal = deviceQueue->getCommandQueueInfo( - static_cast(0), - sizeof(tempValue), - &tempValue, - nullptr); - EXPECT_EQ(tempValue, 0u); - EXPECT_EQ(CL_INVALID_VALUE, retVal); -} diff --git a/opencl/test/unit_test/execution_model/CMakeLists.txt b/opencl/test/unit_test/execution_model/CMakeLists.txt deleted file mode 100644 index 9964ca0214..0000000000 --- a/opencl/test/unit_test/execution_model/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# -# Copyright (C) 2018-2021 Intel Corporation -# -# SPDX-License-Identifier: MIT -# - -set(IGDRCL_SRCS_tests_execution_model - ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_execution_model_kernel_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_dispatch_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_dispatch_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/submit_blocked_parent_kernel_tests.cpp -) -target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_execution_model}) diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp deleted file mode 100644 index 2feb7cd928..0000000000 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ /dev/null @@ -1,621 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/engine_node_helper.h" -#include "shared/source/helpers/local_id_gen.h" -#include "shared/source/helpers/per_thread_data.h" -#include "shared/test/common/cmd_parse/hw_parse.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" -#include "shared/test/common/mocks/mock_allocation_properties.h" -#include "shared/test/common/mocks/mock_csr.h" -#include "shared/test/common/mocks/mock_submissions_aggregator.h" -#include "shared/test/unit_test/utilities/base_object_utils.h" - -#include "opencl/source/built_ins/builtins_dispatch_builder.h" -#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" -#include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/device_queue/device_queue_hw.h" -#include "opencl/source/event/user_event.h" -#include "opencl/source/kernel/kernel.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/helpers/gtest_helpers.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" -#include "opencl/test/unit_test/mocks/mock_event.h" -#include "opencl/test/unit_test/mocks/mock_mdi.h" - -using namespace NEO; - -static const char *binaryFile = "simple_block_kernel"; -static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; - -typedef ExecutionModelKernelTest ParentKernelEnqueueTest; - -HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - pKernel->createReflectionSurface(); - - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); - void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); - - EXPECT_NE(nullptr, executionModelDsh); - - INTERFACE_DESCRIPTOR_DATA *idData = static_cast(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize)); - - size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; - EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); - auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); - - auto &hardwareInfo = pClDevice->getHardwareInfo(); - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - - if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelIsaAddress += pKernel->getKernelInfo().kernelDescriptor.entryPoints.skipSetFFIDGP; - } - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { - if (EncodeSurfaceState::doBindingTablePrefetch()) { - EXPECT_NE(0u, idData[0].getSamplerCount()); - } else { - EXPECT_EQ(0u, idData[0].getSamplerCount()); - } - EXPECT_NE(0u, idData[0].getSamplerStatePointer()); - } - - EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength()); - EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength()); - EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode()); - EXPECT_EQ(static_cast(kernelIsaAddress), idData[0].getKernelStartPointer()); - EXPECT_EQ(static_cast(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh()); - - const uint32_t blockFirstIndex = 1; - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - - ASSERT_NE(nullptr, pBlockInfo); - - auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize; - - const uint32_t sizeCrossThreadData = pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize / grfSize; - - auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels; - auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), grfSize, numChannels); - uint32_t numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); - numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); - - EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength()); - EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength()); - EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer()); - - uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer(); - uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); - - auto &hardwareInfo = pClDevice->getHardwareInfo(); - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - - if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - expectedBlockKernelAddress += pBlockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP; - } - - EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress); - } -} - -HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident, DeviceEnqueueSupport) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - int32_t executionStamp = 0; - auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(mockCSR); - - size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); - for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) { - kernelRequiringPrivateSurface = i; - break; - } - } - - ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); - - GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); - - if (privateSurface == nullptr) { - privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface); - } - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId())); -} - -HWTEST2_P(ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; - - size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); - for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) { - kernelRequiringPrivateSurface = i; - break; - } - } - - ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); - - auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); - - if (privateAllocation == nullptr) { - privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); - blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface); - } - - auto uEvent = make_releaseable(pContext); - auto clEvent = static_cast(uEvent.get()); - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); - - EXPECT_FALSE(csr.isMadeResident(privateAllocation)); - uEvent->setStatus(CL_COMPLETE); - EXPECT_TRUE(csr.isMadeResident(privateAllocation)); -} - -HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident, DeviceEnqueueSupport) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } -} - -HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident, DeviceEnqueueSupport) { - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; - - blockKernelManager->makeInternalAllocationsResident(csr); - - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } -} - -HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; - - auto uEvent = make_releaseable(pContext); - auto clEvent = static_cast(uEvent.get()); - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); - - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } - - uEvent->setStatus(CL_COMPLETE); - - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } -} - -HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - size_t executionModelDSHUsedBefore = dsh->getUsed(); - - uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; - EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - size_t executionModelDSHUsedAfterFirst = dsh->getUsed(); - EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst); - - pDevQueueHw->resetDeviceQueue(); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - size_t executionModelDSHUsedAfterSecond = dsh->getUsed(); - EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond); -} - -HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated, DeviceEnqueueSupport) { - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - pKernel->createReflectionSurface(); - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace()); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - auto ssh2 = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - EXPECT_EQ(ssh, ssh2); - EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation()); -} - -HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied, DeviceEnqueueSupport) { - using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - pKernel->createReflectionSurface(); - - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - // prealign the ssh so that it won't need to be realigned in enqueueKernel - // this way, we can assume the location in memory into which the surface states - // will be coies - ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - // mark the assumed place for surface states - size_t parentSshOffset = 0; - ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - - void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - - ASSERT_NE(nullptr, pBlockInfo); - - Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, *pClDevice, nullptr); - blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - if (blockKernel->getNumberOfBindingTableStates() > 0) { - ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset)); - auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); - EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); - auto dstBindingTable = reinterpret_cast(dstBlockBti); - - auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); - auto srcBindingTable = reinterpret_cast(srcBlockBti); - for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { - uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); - uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); - auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); - auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); - EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); - } - - blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); - } - - delete blockKernel; - } -} - -HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenReflectionSurfaceIsCreated, DeviceEnqueueSupport) { - using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface()); -} - -HWTEST2_P(ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset, DeviceEnqueueSupport) { - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - cl_queue_properties properties[3] = {0}; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - MockDeviceQueueHw mockDevQueue(context, pClDevice, properties[0]); - - context->setDefaultDeviceQueue(&mockDevQueue); - // Acquire CS to check if reset queue was called - mockDevQueue.acquireEMCriticalSection(); - - auto mockEvent = make_releaseable(context); - - cl_event eventBlocking = mockEvent.get(); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr); - - EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); -} - -HWTEST2_P(ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident, DeviceEnqueueSupport) { - typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; - - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - ASSERT_NE(nullptr, pDevQueueHw); - - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - int32_t executionStamp = 0; - auto mockCSR = new MockCsrBase(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(mockCSR); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - auto &cmdStream = mockCSR->getCS(0); - - HardwareParse hwParser; - hwParser.parseCommands(cmdStream, 0); - hwParser.findHardwareCommands(); - - auto stateBaseAddressItor = hwParser.itorStateBaseAddress; - - ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor); - - auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor; - - uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress(); - - EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress()); - - bool dshAllocationResident = false; - - for (auto allocation : mockCSR->madeResidentGfxAllocations) { - if (allocation == pDevQueue->getDshBuffer()) { - dshAllocationResident = true; - break; - } - } - EXPECT_TRUE(dshAllocationResident); -} - -INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest, - ParentKernelEnqueueTest, - ::testing::Combine( - ::testing::Values(binaryFile), - ::testing::ValuesIn(KernelNames))); - -class ParentKernelEnqueueFixture : public ExecutionModelSchedulerTest, - public testing::Test { - public: - void SetUp() override { - ExecutionModelSchedulerTest::SetUp(); - } - - void TearDown() override { - ExecutionModelSchedulerTest::TearDown(); - } -}; - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenDefaultDeviceQueueAndEventPoolIsPatched, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - const auto &implicitArgs = parentKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs; - - const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; - if (isValidOffset(defaultQueueSurfaceAddress.stateless)) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), defaultQueueSurfaceAddress.stateless); - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation); - } - - const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - if (isValidOffset(eventPoolSurfaceAddress.stateless)) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), eventPoolSurfaceAddress.stateless); - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation); - } - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenBlocksDSHOnReflectionSurfaceArePatchedWithDeviceQueueAndEventPoolAddresses, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - void *reflectionSurface = parentKernel->getKernelReflectionSurface()->getUnderlyingBuffer(); - - BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - for (uint32_t i = 0; i < blockCount; i++) { - const auto implicitArgs = blockManager->getBlockKernelInfo(i)->kernelDescriptor.payloadMappings.implicitArgs; - const uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i); - - const auto &defaultQueue = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; - if (defaultQueue.pointerSize == sizeof(uint64_t)) { - EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless)); - } else { - EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless)); - } - - const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) { - EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless)); - } else { - EXPECT_EQ((uint32_t)pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless)); - } - } - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenDeviceQueueCriticalSetionIsAcquired, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_FALSE(pDevQueueHw->isEMCriticalSectionFree()); - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToBlockedQueueThenDeviceQueueCriticalSetionIsNotAcquired, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - auto mockEvent = make_releaseable>(context); - cl_event eventBlocking = mockEvent.get(); - - EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 1, &eventBlocking, nullptr); - - EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); - mockEvent->setStatus(-1); - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenFlushCsrWithSlm, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - int32_t execStamp; - auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(mockCsr); - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(mockCsr->slmUsedInLastFlushTask); - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedWithSchedulerReturnInstanceThenRunSimulation, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - - DebugManagerStateRestore dbgRestorer; - DebugManager.flags.SchedulerSimulationReturnInstance.set(1); - - MockDeviceQueueHw *mockDeviceQueueHw = new MockDeviceQueueHw(context, pClDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); - mockDeviceQueueHw->resetDeviceQueue(); - - context->setDefaultDeviceQueue(mockDeviceQueueHw); - - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - int32_t execStamp; - auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - - BuiltinKernelsSimulation::SchedulerSimulation::enabled = false; - - pDevice->resetCommandStreamReceiver(mockCsr); - - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - BuiltinKernelsSimulation::SchedulerSimulation::enabled = true; - - EXPECT_TRUE(BuiltinKernelsSimulation::SchedulerSimulation::simulationRun); - delete mockDeviceQueueHw; - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKernelIsSubmittedThenItIsFlushed, DeviceEnqueueSupport) { - if (pClDevice->areOcl21FeaturesSupported()) { - auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); - pDevice->resetCommandStreamReceiver(mockCsr); - - auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); - mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); - - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - MockContext context(pClDevice); - MockParentKernel::CreateParams createParams{}; - std::unique_ptr kernelToRun(MockParentKernel::create(context, createParams)); - - pCmdQ->enqueueKernel(kernelToRun.get(), 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); - EXPECT_EQ(1, mockCsr->flushCalledCount); - } -} - -HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenMarkCsrMediaVfeStateDirty, DeviceEnqueueSupport) { - - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - int32_t execStamp; - auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(mockCsr); - - mockCsr->setMediaVFEStateDirty(false); - pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(mockCsr->peekMediaVfeStateDirty()); - } -} diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp deleted file mode 100644 index f4595b667a..0000000000 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/utilities/perf_counter.h" -#include "shared/test/common/cmd_parse/hw_parse.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" -#include "shared/test/common/helpers/unit_test_helper.h" - -#include "opencl/source/command_queue/enqueue_kernel.h" -#include "opencl/source/command_queue/hardware_interface.h" -#include "opencl/source/kernel/kernel.h" -#include "opencl/source/sampler/sampler.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_mdi.h" -#include "opencl/test/unit_test/mocks/mock_program.h" - -using namespace NEO; - -static const char *binaryFile = "simple_block_kernel"; -static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; - -typedef ExecutionModelKernelTest ParentKernelDispatchTest; - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - pKernel->createReflectionSurface(); - - size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); - EXPECT_EQ(0u, dshUsedBefore); - - size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - - DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo(pKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); - EXPECT_EQ(0u, dshUsedAfter); - - size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter); -} - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - auto ish = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - - EXPECT_EQ(ishOfDevQueue, ish); -} - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - auto ioh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - - EXPECT_EQ(dshOfDevQueue, ioh); -} - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) { - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - - auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); - - DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - auto iohUsed = ioh.getUsed(); - EXPECT_EQ(0u, iohUsed); -} - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) { - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); - - EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); - - size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); - - EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); -} - -HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) { - using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - - auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - MultiDispatchInfo multiDispatchInfo(pKernel); - - DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData.get(), - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - ASSERT_NE(nullptr, blockedCommandsData); - - size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); - - size_t sshUsed = blockedCommandsData->ssh->getUsed(); - - size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + - pKernel->getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) + - UnitTestHelper::getDefaultSshUsage(); - - if (pKernel->usesBindfulAddressingForBuffers() || pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesImages) { - EXPECT_EQ(expectedSizeSSH, sshUsed); - } - - EXPECT_GE(minRequiredSize, sshUsed); - // Total SSH size including EM must be greater then ssh allocated - EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed); -} - -INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest, - ParentKernelDispatchTest, - ::testing::Combine( - ::testing::Values(binaryFile), - ::testing::ValuesIn(KernelNames))); - -typedef ParentKernelCommandQueueFixture ParentKernelCommandStreamFixture; - -HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhenCommandStreamIsAcquiredThenSizeAccountsForSchedulerDispatch) { - REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - - MockParentKernel *mockParentKernel = MockParentKernel::create(*context); - - DispatchInfo dispatchInfo(device, mockParentKernel, 1, Vec3{24, 1, 1}, Vec3{24, 1, 1}, Vec3{0, 0, 0}); - MultiDispatchInfo multiDispatchInfo(mockParentKernel); - - size_t size = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel, {}); - size_t numOfKernels = MemoryConstants::pageSize / size; - - size_t rest = MemoryConstants::pageSize - (numOfKernels * size); - - SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); - size_t schedulerSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler, {}); - - while (rest >= schedulerSize) { - numOfKernels++; - rest = alignUp(numOfKernels * size, MemoryConstants::pageSize) - numOfKernels * size; - } - - for (size_t i = 0; i < numOfKernels; i++) { - multiDispatchInfo.push(dispatchInfo); - } - - size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize); - - LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), false, false, - false, multiDispatchInfo, nullptr, 0, false, false); - - EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace()); - - delete mockParentKernel; -} - -class MockParentKernelDispatch : public ExecutionModelSchedulerTest, - public testing::Test { - public: - void SetUp() override { - DebugManager.flags.EnableTimestampPacket.set(0); - ExecutionModelSchedulerTest::SetUp(); - } - - void TearDown() override { - ExecutionModelSchedulerTest::TearDown(); - } - - std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { - auto commandStream = new LinearStream(); - - auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); - gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); - - return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); - } - - DebugManagerStateRestore dbgRestore; -}; - -HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) { - - if (pClDevice->areOcl21FeaturesSupported()) { - MockParentKernel *mockParentKernel = MockParentKernel::create(*context); - - auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo(mockParentKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData.get(), - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - ASSERT_NE(nullptr, blockedCommandsData); - - EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get()); - delete mockParentKernel; - } -} - -HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispatchedThenMediaInterfaceDescriptorLoadIsCorrectlyProgrammed) { - typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; - typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; - - if (pClDevice->areOcl21FeaturesSupported()) { - MockParentKernel *mockParentKernel = MockParentKernel::create(*context); - - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo(mockParentKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - LinearStream *commandStream = &pCmdQ->getCS(0); - - HardwareParse hwParser; - hwParser.parseCommands(*commandStream, 0); - hwParser.findHardwareCommands(); - - ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad); - - auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)hwParser.getCommand(hwParser.cmdList.begin(), hwParser.itorWalker); - - ASSERT_NE(nullptr, pCmd); - - uint32_t offsetInterfaceDescriptorData = DeviceQueue::colorCalcStateSize; - uint32_t sizeInterfaceDescriptorData = sizeof(INTERFACE_DESCRIPTOR_DATA); - - EXPECT_EQ(offsetInterfaceDescriptorData, pCmd->getInterfaceDescriptorDataStartAddress()); - EXPECT_EQ(sizeInterfaceDescriptorData, pCmd->getInterfaceDescriptorTotalLength()); - - delete mockParentKernel; - } -} - -HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedThenNewSSHIsAllocated) { - - if (pClDevice->areOcl21FeaturesSupported()) { - MockParentKernel *mockParentKernel = MockParentKernel::create(*context); - - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); - - uint32_t testSshUse = 20u; - uint32_t expectedSshUse = testSshUse + UnitTestHelper::getDefaultSshUsage(); - ssh.getSpace(testSshUse); - EXPECT_EQ(expectedSshUse, ssh.getUsed()); - - // Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating - // new one by obtaining from reuse list returns the same allocation and heap buffer does not differ - // If parent is not using SSH, then heap obtained has zero usage and the same buffer - ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize); - - DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo(mockParentKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - EXPECT_EQ(0u, ssh.getUsed()); - - delete mockParentKernel; - } -} - -HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatchedThenExistingSSHIsUsed) { - - if (pClDevice->areOcl21FeaturesSupported()) { - MockParentKernel *mockParentKernel = MockParentKernel::create(*context); - - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); - auto defaultSshUsage = UnitTestHelper::getDefaultSshUsage(); - EXPECT_EQ(defaultSshUsage, ssh.getUsed()); - - auto *bufferMemory = ssh.getCpuBase(); - - DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo; - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - EXPECT_EQ(bufferMemory, ssh.getCpuBase()); - - delete mockParentKernel; - } -} diff --git a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp deleted file mode 100644 index c8b4a6f045..0000000000 --- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/built_ins/built_ins.h" -#include "shared/test/common/cmd_parse/hw_parse.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" - -#include "opencl/source/command_queue/enqueue_kernel.h" -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/source/scheduler/scheduler_kernel.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_program.h" - -using namespace NEO; - -class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest, - public testing::Test { - public: - void SetUp() override { - ExecutionModelSchedulerTest::SetUp(); - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice); - } - - void TearDown() override { - ExecutionModelSchedulerTest::TearDown(); - } -}; - -HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenProgrammingIsCorrect, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; - using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - auto &scheduler = static_cast(context->getSchedulerKernel()); - - auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); - auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); - - EXPECT_NE(nullptr, executionModelDsh); - - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - // Setup heaps in pCmdQ - MultiDispatchInfo multiDispatchinfo(&scheduler); - LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), - false, false, false, multiDispatchinfo, - nullptr, 0, false, false); - pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); - - GpgpuWalkerHelper::dispatchScheduler( - pCmdQ->getCS(0), - *pDevQueueHw, - pDevice->getPreemptionMode(), - scheduler, - &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), - pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), - false); - - auto localWorkSize = scheduler.getLocalWorkSizeValues(); - EXPECT_EQ((uint32_t)scheduler.getLws(), *localWorkSize[0]); - EXPECT_EQ(1u, *localWorkSize[1]); - EXPECT_EQ(1u, *localWorkSize[2]); - - auto enqueuedLocalWorkSize = scheduler.getEnqueuedLocalWorkSizeValues(); - if (enqueuedLocalWorkSize[0] != &Kernel::dummyPatchLocation) { - EXPECT_EQ((uint32_t)scheduler.getLws(), *enqueuedLocalWorkSize[0]); - EXPECT_EQ(1u, *enqueuedLocalWorkSize[1]); - EXPECT_EQ(1u, *enqueuedLocalWorkSize[2]); - } - - auto numWorkGroups = scheduler.getNumWorkGroupsValues(); - EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *numWorkGroups[0]); - EXPECT_EQ(0u, *numWorkGroups[1]); - EXPECT_EQ(0u, *numWorkGroups[2]); - - HardwareParse hwParser; - hwParser.parseCommands(commandStream, 0); - hwParser.findHardwareCommands(); - - ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker); - - // Before Walker There must be PC - PIPE_CONTROL *pc = hwParser.getCommand(hwParser.cmdList.begin(), hwParser.itorWalker); - ASSERT_NE(nullptr, pc); - - ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad); - auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad; - - uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress(); - uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength(); - - uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize; - uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA); - - EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed); - EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed); - - auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker; - - size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; - - size_t numWorkgroupsProgrammed[3] = {0, 0, 0}; - - uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum(); - - EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup); - - numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension(); - numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension(); - numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension(); - - EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]); - EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]); - EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]); - - typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize(); - EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize); - - EXPECT_EQ(0u, walker->getThreadGroupIdStartingX()); - EXPECT_EQ(0u, walker->getThreadGroupIdStartingY()); - EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ()); - - uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress(); - assert(offsetCrossThreadDataProgrammed % 64 == 0); - size_t curbeSize = scheduler.getCurbeSize(); - size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account - EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed); - - EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset()); - - auto numChannels = 3; - auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize; - auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); - - auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(); - auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); - EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength()); - - ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker); - auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker; - - uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress(); - EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddress()); -} - -HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenStandardCmdqIohIsNotUsed, DeviceEnqueueSupport) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; - using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - SchedulerKernel &scheduler = context->getSchedulerKernel(); - - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - // Setup heaps in pCmdQ - - MultiDispatchInfo multiDispatchinfo(&scheduler); - getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo, - nullptr, 0, false, false); - pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); - - GpgpuWalkerHelper::dispatchScheduler( - pCmdQ->getCS(0), - *pDevQueueHw, - pDevice->getPreemptionMode(), - scheduler, - &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), - pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), - false); - - auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); - - EXPECT_EQ(0u, ioh.getUsed()); -} - -HWTEST2_F(ParentKernelCommandQueueFixture, GivenEarlyReturnSetToFirstInstanceWhenDispatchingSchedulerThenBbStartCmdIsNotInserted, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockDeviceQueueHw mockDevQueue(context, device, properties[0]); - - auto *igilQueue = mockDevQueue.getIgilQueue(); - - ASSERT_NE(nullptr, igilQueue); - igilQueue->m_controls.m_SchedulerEarlyReturn = 1; - - SchedulerKernel &scheduler = context->getSchedulerKernel(); - - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler); - // Setup heaps in pCmdQ - MultiDispatchInfo multiDispatchinfo(&scheduler); - LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), - false, false, false, multiDispatchinfo, - nullptr, 0, false, false); - pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); - - GpgpuWalkerHelper::dispatchScheduler( - pCmdQ->getCS(0), - mockDevQueue, - device->getPreemptionMode(), - scheduler, - &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), - mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE), - false); - - HardwareParse hwParser; - hwParser.parseCommands(commandStream, 0); - hwParser.findHardwareCommands(); - - EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker); - EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker); -} - -HWTEST2_F(ExecutionModelSchedulerFixture, WhenForceDispatchingSchedulerThenSchedulerKernelIsEnqueued, DeviceEnqueueSupport) { - - DebugManagerStateRestore dbgRestorer; - - DebugManager.flags.ForceDispatchScheduler.set(true); - - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - MockCommandQueueHw *mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); - - mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel); - - delete mockCmdQ; -} diff --git a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp deleted file mode 100644 index 8dc6cfd553..0000000000 --- a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/utilities/hw_timestamps.h" -#include "shared/source/utilities/tag_allocator.h" - -#include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/command_queue/hardware_interface.h" -#include "opencl/source/helpers/hardware_commands_helper.h" -#include "opencl/source/helpers/task_information.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" -#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" - -#include - -using namespace NEO; - -class SubmitBlockedParentKernelFixture : public ExecutionModelSchedulerTest, - public testing::Test { - void SetUp() override { - ExecutionModelSchedulerTest::SetUp(); - } - - void TearDown() override { - ExecutionModelSchedulerTest::TearDown(); - } -}; - -template -class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw { - using BaseClass = DeviceQueueHw; - - public: - MockDeviceQueueHwWithCriticalSectionRelease(Context *context, - ClDevice *device, - cl_queue_properties &properties) : BaseClass(context, device, properties) {} - - bool isEMCriticalSectionFree() override { - auto igilCmdQueue = reinterpret_cast(DeviceQueue::queueBuffer->getUnderlyingBuffer()); - - criticalSectioncheckCounter++; - - if (criticalSectioncheckCounter == maxCounter) { - igilCmdQueue->m_controls.m_CriticalSection = DeviceQueueHw::ExecutionModelCriticalSection::Free; - return true; - } - - return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw::ExecutionModelCriticalSection::Free; - } - - void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override { - indirectStateSetup = true; - return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount, isCcsUsed); - } - void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override { - cleanupSectionAdded = true; - - auto hwTimestampT = static_cast *>(hwTimeStamp); - - timestampAddedInCleanupSection = hwTimestampT ? hwTimestampT->tagForCpuAccess : nullptr; - return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount); - } - void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override { - schedulerDispatched = true; - return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed); - } - - uint32_t criticalSectioncheckCounter = 0; - const uint32_t maxCounter = 10; - bool indirectStateSetup = false; - bool cleanupSectionAdded = false; - bool schedulerDispatched = false; - HwTimeStamps *timestampAddedInCleanupSection = nullptr; -}; - -HWTEST2_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - mockDevQueue.acquireEMCriticalSection(); - - size_t heapSize = 20; - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - - dsh->getSpace(mockDevQueue.getDshOffset()); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter); - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - - size_t heapSize = 20; - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - - // add initial offset of colorCalState - dsh->getSpace(DeviceQueue::colorCalcStateSize); - - uint64_t ValueToFillDsh = 5; - uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t)); - - // Fill Interface Descriptor Data - *dshVal = ValueToFillDsh; - - // Move to parent DSH Offset - size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t); - dsh->getSpace(alignToOffsetDshSize); - - // Fill with pattern - dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t)); - *dshVal = ValueToFillDsh; - - size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed(); - - uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize; - EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - //device queue dsh is not changed - size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed(); - EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - size_t heapSize = 20; - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - - dsh->getSpace(mockDevQueue.getDshOffset()); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - EXPECT_TRUE(mockDevQueue.indirectStateSetup); - EXPECT_TRUE(mockDevQueue.cleanupSectionAdded); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - size_t heapSize = 20; - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - dsh->getSpace(mockDevQueue.getDshOffset()); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - auto timestamp = static_cast *>(pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag()); - cmdComputeKernel->timestamp = timestamp; - cmdComputeKernel->submit(0, false); - - EXPECT_TRUE(mockDevQueue.cleanupSectionAdded); - EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - size_t heapSize = 20; - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - dsh->getSpace(mockDevQueue.getDshOffset()); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - EXPECT_TRUE(mockDevQueue.schedulerDispatched); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapsWhenParentKernelIsSubmittedThenQueueHeapsAreNotUsed, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHw mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - MockCommandQueue cmdQ(context, device, properties, false); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - size_t heapSize = 20; - - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); - - dsh->getSpace(mockDevQueue.getDshOffset()); - - auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); - auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100); - auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100); - - size_t usedSize = 4u; - - queueSsh.getSpace(usedSize); - queueDsh.getSpace(usedSize); - queueIoh.getSpace(usedSize); - - auto intialSshUsed = queueSsh.getUsed(); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled); - EXPECT_EQ(usedSize, queueDsh.getUsed()); - EXPECT_EQ(usedSize, queueIoh.getUsed()); - EXPECT_EQ(intialSshUsed, queueSsh.getUsed()); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - MockParentKernel *parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - MockDeviceQueueHw mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - - size_t heapSize = 20; - - size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); - size_t sshSize = 1000; - IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); - pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); - pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh); - dsh->getSpace(mockDevQueue.getDshOffset()); - - pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize); - - void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase(); - - auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); - auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), - *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - blockedCommandData->setHeaps(dsh, ioh, ssh); - - blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; - PreemptionMode preemptionMode = device->getPreemptionMode(); - std::vector surfaces; - auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); - - cmdComputeKernel->submit(0, false); - - void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase(); - - EXPECT_EQ(sshBuffer, newSshBuffer); - - delete cmdComputeKernel; -} - -HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - - cl_queue_properties properties[3] = {0}; - auto parentKernel = MockParentKernel::create(*context); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - - MockDeviceQueueHw mockDevQueue(context, device, properties[0]); - parentKernel->createReflectionSurface(); - context->setDefaultDeviceQueue(&mockDevQueue); - - auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - - DispatchInfo dispatchInfo(device, parentKernel, 1, workItems, nullptr, globalOffsets); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - MultiDispatchInfo multiDispatchInfo(parentKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData.get(), - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - - EXPECT_NE(nullptr, blockedCommandsData); - EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize()); - EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh); - - EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation()); - EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation()); - EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation()); - EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation()); -} diff --git a/opencl/test/unit_test/fixtures/CMakeLists.txt b/opencl/test/unit_test/fixtures/CMakeLists.txt index 18445e8a80..059c2acd4c 100644 --- a/opencl/test/unit_test/fixtures/CMakeLists.txt +++ b/opencl/test/unit_test/fixtures/CMakeLists.txt @@ -18,7 +18,6 @@ set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_matcher.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/execution_model_kernel_fixture.h diff --git a/opencl/test/unit_test/fixtures/device_host_queue_fixture.h b/opencl/test/unit_test/fixtures/device_host_queue_fixture.h index 7181cd9b68..77f9d47905 100644 --- a/opencl/test/unit_test/fixtures/device_host_queue_fixture.h +++ b/opencl/test/unit_test/fixtures/device_host_queue_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" -#include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" @@ -54,37 +53,4 @@ class DeviceHostQueueFixture : public ApiFixture<>, cl_queue_properties properties[5] = deviceQueueProperties::noProperties); }; -class DeviceQueueHwTest : public DeviceHostQueueFixture { - public: - using BaseClass = DeviceHostQueueFixture; - void SetUp() override { - BaseClass::SetUp(); - device = castToObject(testedClDevice); - ASSERT_NE(device, nullptr); - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device); - } - - void TearDown() override { - BaseClass::TearDown(); - } - - template - DeviceQueueHw *castToHwType(DeviceQueue *deviceQueue) { - return reinterpret_cast *>(deviceQueue); - } - - template - size_t getMinimumSlbSize() { - return sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + - sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD) + - sizeof(typename GfxFamily::PIPE_CONTROL) + - sizeof(typename GfxFamily::GPGPU_WALKER) + - sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + - sizeof(typename GfxFamily::PIPE_CONTROL) + - DeviceQueueHw::getCSPrefetchSize(); // prefetch size - } - - DeviceQueue *deviceQueue; - ClDevice *device; -}; } // namespace DeviceHostQueue diff --git a/opencl/test/unit_test/fixtures/device_queue_matcher.h b/opencl/test/unit_test/fixtures/device_queue_matcher.h deleted file mode 100644 index 601d4385b9..0000000000 --- a/opencl/test/unit_test/fixtures/device_queue_matcher.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -#include "test_traits_common.h" - -struct DeviceEnqueueSupport { - template - static constexpr bool isMatched() { - return TestTraits::get()>::deviceEnqueueSupport; - } -}; \ No newline at end of file diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 51e9e0ee03..8662b1b322 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -36,12 +36,10 @@ #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/fixtures/context_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/program/program_tests.h" @@ -1071,161 +1069,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenK EXPECT_EQ(CL_SUCCESS, retVal); } -HWTEST2_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithDeviceEnqueueIsUsedThenKernelCreateAndSubmitCallbacksAreNotCalled, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice); - - gtpinCallbacks.onContextCreate = OnContextCreate; - gtpinCallbacks.onContextDestroy = OnContextDestroy; - gtpinCallbacks.onKernelCreate = OnKernelCreate; - gtpinCallbacks.onKernelSubmit = OnKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - - cl_device_id device = (cl_device_id)pDevice; - cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, context); - auto pContext = castToObject(context); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); - - cl_queue_properties devQproperties = 0; - auto devQ = std::make_unique>(pContext, pDevice, devQproperties); - pContext->setDefaultDeviceQueue(devQ.get()); - - cl_command_queue cmdQ = nullptr; - cl_queue_properties properties = 0; - - cmdQ = clCreateCommandQueue(context, device, properties, &retVal); - ASSERT_NE(nullptr, cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - // Prepare a kernel with fake Execution Environment - char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; - size_t binSize = 10; - MockProgram *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), &binary[0], binSize, &retVal); - ASSERT_NE(nullptr, pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - char *pBin = &binary[0]; - SProgramBinaryHeader *pBHdr = (SProgramBinaryHeader *)pBin; - pBHdr->Magic = iOpenCL::MAGIC_CL; - pBHdr->Version = iOpenCL::CURRENT_ICBE_VERSION; - pBHdr->Device = pDevice->getHardwareInfo().platform.eRenderCoreFamily; - pBHdr->GPUPointerSizeInBytes = 8; - pBHdr->NumberOfKernels = 1; - pBHdr->SteppingId = 0; - pBHdr->PatchListSize = 0; - pBin += sizeof(SProgramBinaryHeader); - binSize += sizeof(SProgramBinaryHeader); - - SKernelBinaryHeaderCommon *pKHdr = (SKernelBinaryHeaderCommon *)pBin; - pKHdr->CheckSum = 0; - pKHdr->ShaderHashCode = 0; - pKHdr->KernelNameSize = 4; - pKHdr->PatchListSize = sizeof(SPatchExecutionEnvironment) + sizeof(SPatchBindingTableState); - pKHdr->KernelHeapSize = 16; - pKHdr->GeneralStateHeapSize = 0; - pKHdr->DynamicStateHeapSize = 0; - pKHdr->SurfaceStateHeapSize = 64; - pKHdr->KernelUnpaddedSize = 0; - pBin += sizeof(SKernelBinaryHeaderCommon); - binSize += sizeof(SKernelBinaryHeaderCommon); - char *pKernelBin = pBin; - - strcpy(pBin, "Tst"); - pBin += pKHdr->KernelNameSize; - binSize += pKHdr->KernelNameSize; - - strcpy(pBin, "fake_ISA_code__"); - pBin += pKHdr->KernelHeapSize; - binSize += pKHdr->KernelHeapSize; - - memset(pBin, 0, pKHdr->SurfaceStateHeapSize); - pBin += pKHdr->SurfaceStateHeapSize; - binSize += pKHdr->SurfaceStateHeapSize; - - SPatchExecutionEnvironment *pPatch1 = (SPatchExecutionEnvironment *)pBin; - pPatch1->Token = iOpenCL::PATCH_TOKEN_EXECUTION_ENVIRONMENT; - pPatch1->Size = sizeof(iOpenCL::SPatchExecutionEnvironment); - pPatch1->RequiredWorkGroupSizeX = 0; - pPatch1->RequiredWorkGroupSizeY = 0; - pPatch1->RequiredWorkGroupSizeZ = 0; - pPatch1->LargestCompiledSIMDSize = 8; - pPatch1->CompiledSubGroupsNumber = 0; - pPatch1->HasBarriers = 0; - pPatch1->DisableMidThreadPreemption = 0; - pPatch1->HasDeviceEnqueue = 1; - pPatch1->MayAccessUndeclaredResource = 0; - pPatch1->UsesFencesForReadWriteImages = 0; - pPatch1->UsesStatelessSpillFill = 0; - pPatch1->IsCoherent = 0; - pPatch1->IsInitializer = 0; - pPatch1->IsFinalizer = 0; - pPatch1->SubgroupIndependentForwardProgressRequired = 0; - pPatch1->CompiledForGreaterThan4GBBuffers = 0; - pBin += sizeof(SPatchExecutionEnvironment); - binSize += sizeof(SPatchExecutionEnvironment); - - SPatchBindingTableState *pPatch2 = (SPatchBindingTableState *)pBin; - pPatch2->Token = iOpenCL::PATCH_TOKEN_BINDING_TABLE_STATE; - pPatch2->Size = sizeof(iOpenCL::SPatchBindingTableState); - pPatch2->Offset = 0; - pPatch2->Count = 1; - pPatch2->SurfaceStateOffset = 0; - binSize += sizeof(SPatchBindingTableState); - - uint32_t kernelBinSize = - pKHdr->DynamicStateHeapSize + - pKHdr->GeneralStateHeapSize + - pKHdr->KernelHeapSize + - pKHdr->KernelNameSize + - pKHdr->PatchListSize + - pKHdr->SurfaceStateHeapSize; - uint64_t hashValue = Hash::hash(reinterpret_cast(pKernelBin), kernelBinSize); - pKHdr->CheckSum = static_cast(hashValue & 0xFFFFFFFF); - - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(&binary[0], binSize); - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = binSize; - retVal = pProgram->processGenBinary(*pDevice); - EXPECT_EQ(CL_SUCCESS, retVal); - - // Verify that GT-Pin Kernel Create callback is not called - int prevCount = KernelCreateCallbackCount; - cl_kernel kernel = clCreateKernel(pProgram, "Tst", &retVal); - EXPECT_NE(nullptr, kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount, KernelCreateCallbackCount); - - int prevCount2 = KernelSubmitCallbackCount; - cl_uint workDim = 1; - size_t globalWorkOffset[3] = {0, 0, 0}; - size_t globalWorkSize[3] = {1, 1, 1}; - size_t localWorkSize[3] = {1, 1, 1}; - - MockParentKernel *parentKernel = MockParentKernel::create(*pContext); - auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); - auto pMultiDeviceKernel = std::make_unique(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); - - retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); - - // Cleanup - retVal = clReleaseKernel(kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseCommandQueue(cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseContext(context); - EXPECT_EQ(CL_SUCCESS, retVal); -} - TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; diff --git a/opencl/test/unit_test/kernel/CMakeLists.txt b/opencl/test/unit_test/kernel/CMakeLists.txt index 1a23c59969..f2782d1012 100644 --- a/opencl/test/unit_test/kernel/CMakeLists.txt +++ b/opencl/test/unit_test/kernel/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -21,7 +21,6 @@ set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_is_patched_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_dev_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_reflection_surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_tests.cpp diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 152f291f4b..e6895d9ff3 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,11 +15,9 @@ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" @@ -424,48 +422,6 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(3, sampler->getRefInternalCount()); } -HWTEST2_F(CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect, DeviceEnqueueSupport) { - pKernelInfo->addArgDevQueue(0, 0x20, sizeof(void *)); - - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device1); - - cl_queue_properties queueProps[5] = { - CL_QUEUE_PROPERTIES, - CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - 0, 0, 0}; - - MockDeviceQueueHw mockDevQueue(context.get(), device1, queueProps[0]); - auto clDeviceQueue = static_cast(&mockDevQueue); - auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); - - pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue); - pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue); - - retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_command_queue), &clDeviceQueue); - ASSERT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); - EXPECT_EQ(Kernel::DEVICE_QUEUE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); - EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); - EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); - EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - - retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); - EXPECT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); - EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - - auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); - EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); -} - TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { char *svmPtr = new char[256]; diff --git a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp deleted file mode 100644 index fb473b0416..0000000000 --- a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/mocks/mock_buffer.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_program.h" - -using namespace NEO; -using namespace DeviceHostQueue; - -struct KernelArgDevQueueTest : public DeviceHostQueueFixture { - protected: - void SetUp() override { - DeviceHostQueueFixture::SetUp(); - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice); - - pDeviceQueue = createQueueObject(); - - pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; - - pKernelInfo->addArgDevQueue(0, crossThreadOffset, 4); - - program = std::make_unique(toClDeviceVector(*pDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - uint8_t pCrossThreadData[crossThreadDataSize]; - memset(pCrossThreadData, crossThreadDataInit, sizeof(pCrossThreadData)); - pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); - } - - void TearDown() override { - delete pKernel; - - delete pDeviceQueue; - - DeviceHostQueueFixture::TearDown(); - } - - bool crossThreadDataUnchanged() { - for (uint32_t i = 0; i < crossThreadDataSize; i++) { - if (pKernel->mockCrossThreadData[i] != crossThreadDataInit) { - return false; - } - } - - return true; - } - - static const uint32_t crossThreadDataSize = 0x10; - static const char crossThreadDataInit = 0x7e; - const CrossThreadDataOffset crossThreadOffset = 0x4; - - std::unique_ptr program; - DeviceQueue *pDeviceQueue = nullptr; - MockKernel *pKernel = nullptr; - std::unique_ptr pKernelInfo; -}; - -HWTEST2_F(KernelArgDevQueueTest, GivenKernelWithDevQueueArgWhenSettingArgHandleThenCorrectHandleIsSet, DeviceEnqueueSupport) { - EXPECT_EQ(pKernel->kernelArgHandlers[0], &Kernel::setArgDevQueue); -} - -HWTEST2_F(KernelArgDevQueueTest, GivenDeviceQueueWhenSettingArgDevQueueThenCorrectlyPatched, DeviceEnqueueSupport) { - auto clDeviceQueue = static_cast(pDeviceQueue); - - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clDeviceQueue); - EXPECT_EQ(ret, CL_SUCCESS); - - auto gpuAddress = static_cast(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch()); - auto patchLocation = ptrOffset(pKernel->mockCrossThreadData.data(), crossThreadOffset); - EXPECT_EQ(*(reinterpret_cast(patchLocation)), gpuAddress); -} - -HWTEST2_F(KernelArgDevQueueTest, GivenCommandQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) { - auto clCmdQueue = static_cast(pCommandQueue); - - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clCmdQueue); - EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); - EXPECT_EQ(crossThreadDataUnchanged(), true); -} - -HWTEST2_F(KernelArgDevQueueTest, GivenNonQueueObjectWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) { - Buffer *buffer = new MockBuffer(); - auto clBuffer = static_cast(buffer); - - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clBuffer); - EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); - EXPECT_EQ(crossThreadDataUnchanged(), true); - - delete buffer; -} - -HWTEST2_F(KernelArgDevQueueTest, GivenInvalidQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) { - char *pFakeDeviceQueue = new char[sizeof(DeviceQueue)]; - auto clFakeDeviceQueue = reinterpret_cast(pFakeDeviceQueue); - - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clFakeDeviceQueue); - EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); - EXPECT_EQ(crossThreadDataUnchanged(), true); - - delete[] pFakeDeviceQueue; -} - -HWTEST2_F(KernelArgDevQueueTest, GivenNullDeviceQueueWhenSettingArgDevQueueThenInvalidArgValueErrorIsReturned, DeviceEnqueueSupport) { - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), nullptr); - EXPECT_EQ(ret, CL_INVALID_ARG_VALUE); - EXPECT_EQ(crossThreadDataUnchanged(), true); -} - -HWTEST2_F(KernelArgDevQueueTest, GivenInvalidSizeWhenSettingArgDevQueueThenInvalidArgSizeErrorIsReturned, DeviceEnqueueSupport) { - auto clDeviceQueue = static_cast(pDeviceQueue); - - auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue) - 1, &clDeviceQueue); - EXPECT_EQ(ret, CL_INVALID_ARG_SIZE); - EXPECT_EQ(crossThreadDataUnchanged(), true); -} diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 7742e2fffb..bde856e69d 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -19,7 +19,6 @@ #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 55c3fe2d60..89517bdb6c 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -34,7 +34,6 @@ #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" @@ -1013,264 +1012,6 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon delete pKernel; } -HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - - pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0, 0); - - // create kernel - MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - // setup surface state heap - char surfaceStateHeap[0x80]; - pKernelInfo->heapInfo.pSsh = surfaceStateHeap; - pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); - - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), - pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - - EXPECT_EQ(0u, surfaceAddress); - auto surfaceType = surfaceState->getSurfaceType(); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); - - delete pKernel; -} - -HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - - pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0, 0); - - // create kernel - MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - // setup surface state heap - char surfaceStateHeap[0x80]; - pKernelInfo->heapInfo.pSsh = surfaceStateHeap; - pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - pKernel->patchEventPool(pDevQueue); - - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), - pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), surfaceAddress); - auto surfaceType = surfaceState->getSurfaceType(); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); - - delete pKernel; -} - -HWTEST2_F(KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) { - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - uint64_t crossThreadData = 123; - - pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); - - pKernel->patchEventPool(pDevQueue); - - EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); - - delete pKernel; -} - -HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched, DeviceEnqueueSupport) { - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0); - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - if (pClDevice->areOcl21FeaturesSupported() == false) { - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - } else { - } - - delete pKernel; -} - -HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) { - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0); - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - uint64_t crossThreadData = 0; - - pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); - - pKernel->patchEventPool(pDevQueue); - - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); - - delete pKernel; -} - -HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0, 0); - - // create kernel - MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - // setup surface state heap - char surfaceStateHeap[0x80]; - pKernelInfo->heapInfo.pSsh = surfaceStateHeap; - pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); - - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), - pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - - EXPECT_EQ(0u, surfaceAddress); - auto surfaceType = surfaceState->getSurfaceType(); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); - - delete pKernel; -} - -HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0, 0); - - // create kernel - MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - // setup surface state heap - char surfaceStateHeap[0x80]; - pKernelInfo->heapInfo.pSsh = surfaceStateHeap; - pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - pKernel->patchDefaultDeviceQueue(pDevQueue); - - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); - - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), - pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), surfaceAddress); - auto surfaceType = surfaceState->getSurfaceType(); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); - - delete pKernel; -} - -HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0); - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - - delete pKernel; -} - -HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - uint64_t crossThreadData = 123; - - pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); - - pKernel->patchDefaultDeviceQueue(pDevQueue); - - EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); - - delete pKernel; -} - -HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) { - - // define kernel info - auto pKernelInfo = std::make_unique(); - pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0); - - // create kernel - MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); - - uint64_t crossThreadData = 0; - - pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); - - pKernel->patchDefaultDeviceQueue(pDevQueue); - - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); - - delete pKernel; -} - typedef Test KernelResidencyTest; HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIsMadeResident) { @@ -3052,64 +2793,6 @@ TEST(KernelTest, givenKernelWhenSettingAdditinalKernelExecInfoThenCorrectValueIs EXPECT_EQ(AdditionalKernelExecInfo::NotApplicable, mockKernel.getAdditionalKernelExecInfo()); } -namespace NEO { - -template -class DeviceQueueHwMock : public DeviceQueueHw { - using BaseClass = DeviceQueueHw; - - public: - using BaseClass::buildSlbDummyCommands; - using BaseClass::getCSPrefetchSize; - using BaseClass::getExecutionModelCleanupSectionSize; - using BaseClass::getMediaStateClearCmdsSize; - using BaseClass::getMinimumSlbSize; - using BaseClass::getProfilingEndCmdsSize; - using BaseClass::getSlbCS; - using BaseClass::getWaCommandsSize; - using BaseClass::offsetDsh; - - DeviceQueueHwMock(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) { - auto slb = this->getSlbBuffer(); - LinearStream *slbCS = getSlbCS(); - slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset - }; -}; -} // namespace NEO - -HWTEST2_F(DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue, DeviceEnqueueSupport) { - std::unique_ptr> mockDeviceQueueHw(new DeviceQueueHwMock(pContext, device, deviceQueueProperties::minimumProperties[0])); - - auto slb = mockDeviceQueueHw->getSlbBuffer(); - auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); - auto slbCopy = malloc(slb->getUnderlyingBufferSize()); - memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize()); - memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); - - auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - - // slbEndOffset < commandsSize * 128 - // always fill only 1 enqueue (after offset) - auto offset = static_cast(commandsSize) * 50; - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset; - mockDeviceQueueHw->resetDeviceQueue(); - EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset - EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset), - slbCopy, commandsSize)); // change 1 enqueue - EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize), - slbCopy, offset)); // dont touch memory after (offset + 1 enqueue) - - // slbEndOffset == commandsSize * 128 - // dont fill commands - memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize()); - offset = static_cast(commandsSize) * 128; - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast(commandsSize); - mockDeviceQueueHw->resetDeviceQueue(); - EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues - - free(slbCopy); -} - using KernelMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThenPrivateSurfacesHaveCorrectRootDeviceIndex) { diff --git a/opencl/test/unit_test/mocks/CMakeLists.txt b/opencl/test/unit_test/mocks/CMakeLists.txt index 9734b0c947..c5560b6ea9 100644 --- a/opencl/test/unit_test/mocks/CMakeLists.txt +++ b/opencl/test/unit_test/mocks/CMakeLists.txt @@ -16,7 +16,6 @@ set(IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h - ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_image.h diff --git a/opencl/test/unit_test/mocks/mock_device_queue.h b/opencl/test/unit_test/mocks/mock_device_queue.h deleted file mode 100644 index f02dbbb33a..0000000000 --- a/opencl/test/unit_test/mocks/mock_device_queue.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (C) 2018-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "shared/source/command_container/command_encoder.h" - -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/source/device_queue/device_queue_hw.h" -#include "opencl/source/helpers/hardware_commands_helper.h" - -namespace NEO { -template -class MockDeviceQueueHw : public DeviceQueueHw { - using BaseClass = DeviceQueueHw; - using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; - using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; - using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; - using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; - - public: - using BaseClass::addArbCheckCmdWa; - using BaseClass::addLriCmd; - using BaseClass::addLriCmdWa; - using BaseClass::addMediaStateClearCmds; - using BaseClass::addMiAtomicCmdWa; - using BaseClass::addPipeControlCmdWa; - using BaseClass::addProfilingEndCmds; - using BaseClass::buildSlbDummyCommands; - using BaseClass::getBlockKernelStartPointer; - using BaseClass::getCSPrefetchSize; - using BaseClass::getExecutionModelCleanupSectionSize; - using BaseClass::getMediaStateClearCmdsSize; - using BaseClass::getMinimumSlbSize; - using BaseClass::getProfilingEndCmdsSize; - using BaseClass::getSlbCS; - using BaseClass::getWaCommandsSize; - using BaseClass::offsetDsh; - - bool arbCheckWa; - bool miAtomicWa; - bool lriWa; - bool pipeControlWa; - - struct ExpectedCmds { - MEDIA_STATE_FLUSH mediaStateFlush; - MI_ARB_CHECK arbCheck; - MI_ATOMIC miAtomic; - MEDIA_INTERFACE_DESCRIPTOR_LOAD mediaIdLoad; - MI_LOAD_REGISTER_IMM lriTrue; - MI_LOAD_REGISTER_IMM lriFalse; - PIPE_CONTROL pipeControl; - PIPE_CONTROL noopedPipeControl; - GPGPU_WALKER gpgpuWalker; - uint8_t *prefetch; - MI_BATCH_BUFFER_START bbStart; - } expectedCmds; - - MockDeviceQueueHw(Context *context, - ClDevice *device, - cl_queue_properties &properties) : BaseClass(context, device, properties) { - auto slb = this->getSlbBuffer(); - LinearStream *slbCS = getSlbCS(); - slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); - size_t size = slbCS->getUsed(); - - lriWa = false; - addLriCmdWa(true); - if (slbCS->getUsed() > size) { - size = slbCS->getUsed(); - lriWa = true; - } - pipeControlWa = false; - addPipeControlCmdWa(); - if (slbCS->getUsed() > size) { - size = slbCS->getUsed(); - pipeControlWa = true; - } - arbCheckWa = false; - addArbCheckCmdWa(); - if (slbCS->getUsed() > size) { - size = slbCS->getUsed(); - arbCheckWa = true; - } - miAtomicWa = false; - addMiAtomicCmdWa(0); - if (slbCS->getUsed() > size) { - size = slbCS->getUsed(); - miAtomicWa = true; - } - slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset - - setupExpectedCmds(); - }; - - ~MockDeviceQueueHw() override { - if (expectedCmds.prefetch) - delete expectedCmds.prefetch; - } - - MI_ATOMIC getExpectedMiAtomicCmd() { - auto igilCmdQueue = reinterpret_cast(this->queueBuffer->getUnderlyingBuffer()); - auto placeholder = (uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder; - - MI_ATOMIC miAtomic = GfxFamily::cmdInitAtomic; - EncodeAtomic::programMiAtomic(&miAtomic, - placeholder, - MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, - MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, - 0x1u, 0x1u, 0x0u, 0x0u); - return miAtomic; - } - - MI_LOAD_REGISTER_IMM getExpectedLriCmd(bool arbCheck) { - MI_LOAD_REGISTER_IMM lri = GfxFamily::cmdInitLoadRegisterImm; - lri.setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset - if (arbCheck) - lri.setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only) - else - lri.setDataDword(0x0); // default value - - return lri; - } - - PIPE_CONTROL getExpectedPipeControlCmd() { - PIPE_CONTROL pc; - this->initPipeControl(&pc); - return pc; - } - - MI_ARB_CHECK getExpectedArbCheckCmd() { - return GfxFamily::cmdInitArbCheck; - } - - void setupExpectedCmds() { - expectedCmds.mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; - expectedCmds.arbCheck = getExpectedArbCheckCmd(); - expectedCmds.miAtomic = getExpectedMiAtomicCmd(); - expectedCmds.mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; - expectedCmds.mediaIdLoad.setInterfaceDescriptorTotalLength(2048); - - auto dataStartAddress = DeviceQueue::colorCalcStateSize; - - // add shift to second table ( 62 index of first ID table with scheduler ) - dataStartAddress += sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::schedulerIDIndex; - - expectedCmds.mediaIdLoad.setInterfaceDescriptorDataStartAddress(dataStartAddress); - expectedCmds.lriTrue = getExpectedLriCmd(true); - expectedCmds.lriFalse = getExpectedLriCmd(false); - expectedCmds.pipeControl = getExpectedPipeControlCmd(); - memset(&expectedCmds.noopedPipeControl, 0x0, sizeof(PIPE_CONTROL)); - expectedCmds.gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; - expectedCmds.gpgpuWalker.setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); - expectedCmds.gpgpuWalker.setThreadGroupIdXDimension(1); - expectedCmds.gpgpuWalker.setThreadGroupIdYDimension(1); - expectedCmds.gpgpuWalker.setThreadGroupIdZDimension(1); - expectedCmds.gpgpuWalker.setRightExecutionMask(0xFFFFFFFF); - expectedCmds.gpgpuWalker.setBottomExecutionMask(0xFFFFFFFF); - expectedCmds.prefetch = new uint8_t[DeviceQueueHw::getCSPrefetchSize()]; - memset(expectedCmds.prefetch, 0x0, DeviceQueueHw::getCSPrefetchSize()); - expectedCmds.bbStart = GfxFamily::cmdInitBatchBufferStart; - auto slbPtr = reinterpret_cast(this->getSlbBuffer()->getUnderlyingBuffer()); - expectedCmds.bbStart.setBatchBufferStartAddress(slbPtr); - } - - IGIL_CommandQueue *getIgilQueue() { - auto igilCmdQueue = reinterpret_cast(DeviceQueue::queueBuffer->getUnderlyingBuffer()); - return igilCmdQueue; - } -}; -} // namespace NEO diff --git a/opencl/test/unit_test/mt_tests/device_queue/CMakeLists.txt b/opencl/test/unit_test/mt_tests/device_queue/CMakeLists.txt deleted file mode 100644 index 2ab2e67950..0000000000 --- a/opencl/test/unit_test/mt_tests/device_queue/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (C) 2018-2021 Intel Corporation -# -# SPDX-License-Identifier: MIT -# - -set(IGDRCL_SRCS_mt_tests_device_queue - # local files - ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_mt_tests.cpp -) -target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_device_queue}) diff --git a/opencl/test/unit_test/mt_tests/device_queue/device_queue_mt_tests.cpp b/opencl/test/unit_test/mt_tests/device_queue/device_queue_mt_tests.cpp deleted file mode 100644 index f8b2e43ad7..0000000000 --- a/opencl/test/unit_test/mt_tests/device_queue/device_queue_mt_tests.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/test/common/mocks/mock_device.h" -#include "shared/test/common/test_macros/test.h" - -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/mocks/mock_cl_device.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" -#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" - -using namespace NEO; - -typedef ::testing::Test DeviceQueueHwMtTest; - -HWTEST2_F(DeviceQueueHwMtTest, givenTakenIgilCriticalSectionWhenSecondThreadIsWaitingThenDontHang, DeviceEnqueueSupport) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); - auto context = std::unique_ptr(new MockContext()); - - cl_queue_properties properties[3] = {0}; - MockDeviceQueueHw mockDevQueue(context.get(), device.get(), properties[0]); - - auto igilCmdQueue = mockDevQueue.getIgilQueue(); - auto igilCriticalSection = const_cast(&igilCmdQueue->m_controls.m_CriticalSection); - *igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Taken; - EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); - - std::mutex mtx; - - auto thread = std::thread([&] { - std::unique_lock inThreadLock(mtx); - while (!mockDevQueue.isEMCriticalSectionFree()) { - inThreadLock.unlock(); - inThreadLock.lock(); - } - }); - - std::unique_lock lock(mtx); - *igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Free; - lock.unlock(); - - thread.join(); - EXPECT_TRUE(mockDevQueue.isEMCriticalSectionFree()); -} diff --git a/opencl/test/unit_test/scheduler/CMakeLists.txt b/opencl/test/unit_test/scheduler/CMakeLists.txt index 846968d63e..2fafcb46de 100644 --- a/opencl/test/unit_test/scheduler/CMakeLists.txt +++ b/opencl/test/unit_test/scheduler/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -7,8 +7,5 @@ set(IGDRCL_SRCS_tests_scheduler ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.h - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.inl ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scheduler}) diff --git a/opencl/test/unit_test/scheduler/scheduler_source_tests.cpp b/opencl/test/unit_test/scheduler/scheduler_source_tests.cpp deleted file mode 100644 index 6e78481b18..0000000000 --- a/opencl/test/unit_test/scheduler/scheduler_source_tests.cpp +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" - -#include "shared/test/common/cmd_parse/hw_parse.h" -#include "shared/test/common/test_macros/test.h" - -#include "opencl/source/device_queue/device_queue_hw.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/device_queue_matcher.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" - -#include "gtest/gtest.h" -#include "hw_cmds.h" -// Keep this include after execution_model_fixture.h otherwise there is high chance of conflict with macros -#include "opencl/source/builtin_kernels_simulation/opencl_c.h" -#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" - -using namespace NEO; -using namespace BuiltinKernelsSimulation; - -HWTEST2_F(SchedulerSourceTest, WhenEnqueingThenGpgpuWalkerIsPatchedCorrectly, DeviceEnqueueSupport) { - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; - using MI_ATOMIC = typename FamilyType::MI_ATOMIC; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - - size_t msfOffset = 0; - size_t miArbCheckOffset = 0; - size_t miAtomicOffset = 0; - size_t mediaIDLoadOffset = 0; - size_t miLoadRegOffset = 0; - size_t pipeControlOffset = 0; - size_t gpgpuOffset = 0; - size_t msfOffset2 = 0; - size_t miArbCheckOffset2 = 0; - - size_t msfOffsetAfter = 0; - size_t miArbCheckOffsetAfter = 0; - size_t miAtomicOffsetAfter = 0; - size_t mediaIDLoadOffsetAfter = 0; - size_t miLoadRegOffsetAfter = 0; - size_t pipeControlOffsetAfter = 0; - size_t gpgpuOffsetAfter = 0; - size_t msfOffsetAfter2 = 0; - size_t miArbCheckOffsetAfter2 = 0; - - auto pDevQueueHw = new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); - - // Prepopulate SLB with commands - pDevQueueHw->buildSlbDummyCommands(); - LinearStream *slb = pDevQueueHw->getSlbCS(); - HardwareParse hwParser; - hwParser.parseCommands(*slb, 0); - - // Parse commands and save offsets of first enqueue space - auto itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; - - EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); - - auto itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; - - auto itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; - - auto itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; - - auto itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; - - auto itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; - - auto itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - auto *walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; - - auto itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); - auto *msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; - - auto itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); - auto *arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; - - if (msf) - msfOffset = ptrDiff(msf, slb->getCpuBase()); - - if (arbCheck) - miArbCheckOffset = ptrDiff(arbCheck, slb->getCpuBase()); - - if (miAtomic) - miAtomicOffset = ptrDiff(miAtomic, slb->getCpuBase()); - - if (idLoad) - mediaIDLoadOffset = ptrDiff(idLoad, slb->getCpuBase()); - - if (miLoadReg) - miLoadRegOffset = ptrDiff(miLoadReg, slb->getCpuBase()); - - if (pipeControl) - pipeControlOffset = ptrDiff(pipeControl, slb->getCpuBase()); - - if (walker) - gpgpuOffset = ptrDiff(walker, slb->getCpuBase()); - - if (msf2) - msfOffset2 = ptrDiff(msf2, slb->getCpuBase()); - - if (arbCheck2) - miArbCheckOffset2 = ptrDiff(arbCheck2, slb->getCpuBase()); - - uint32_t *slbBuffer = (uint32_t *)slb->getCpuBase(); - uint32_t secondLevelBatchOffset = 0; - uint32_t InterfaceDescriptorOffset = 3; - uint32_t SIMDSize = 16; - uint32_t TotalLocalWorkSize = 24; - uint3 DimSize = {6, 4, 1}; - uint3 StartPoint = {4, 4, 0}; - uint32_t NumberOfHWThreadsPerWG = 3; - uint32_t IndirectPayloadSize = 10; - uint32_t IOHoffset = 256; - - SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, slbBuffer, InterfaceDescriptorOffset, SIMDSize, TotalLocalWorkSize, DimSize, StartPoint, NumberOfHWThreadsPerWG, IndirectPayloadSize, IOHoffset); - - size_t commandsSize = pDevQueueHw->getMinimumSlbSize() + pDevQueueHw->getWaCommandsSize(); - - // Parse again - LinearStream slbTested(slbBuffer, commandsSize); - hwParser.cmdList.clear(); - slbTested.getSpace(commandsSize); - hwParser.parseCommands(slbTested, 0); - - itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; - - EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); - - itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; - - itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; - - itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; - - itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; - - itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; - - itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; - - itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); - msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; - - itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); - arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; - - if (msf) - msfOffsetAfter = ptrDiff(msf, slbTested.getCpuBase()); - - if (arbCheck) - miArbCheckOffsetAfter = ptrDiff(arbCheck, slbTested.getCpuBase()); - - if (miAtomic) - miAtomicOffsetAfter = ptrDiff(miAtomic, slbTested.getCpuBase()); - - if (idLoad) - mediaIDLoadOffsetAfter = ptrDiff(idLoad, slbTested.getCpuBase()); - - if (miLoadReg) - miLoadRegOffsetAfter = ptrDiff(miLoadReg, slbTested.getCpuBase()); - - if (pipeControl) - pipeControlOffsetAfter = ptrDiff(pipeControl, slbTested.getCpuBase()); - - if (walker) - gpgpuOffsetAfter = ptrDiff(walker, slbTested.getCpuBase()); - - if (msf2) - msfOffsetAfter2 = ptrDiff(msf2, slbTested.getCpuBase()); - - if (arbCheck2) - miArbCheckOffsetAfter2 = ptrDiff(arbCheck2, slbTested.getCpuBase()); - - EXPECT_EQ(msfOffset, msfOffsetAfter); - EXPECT_EQ(miArbCheckOffset, miArbCheckOffsetAfter); - EXPECT_EQ(miAtomicOffset, miAtomicOffsetAfter); - EXPECT_EQ(mediaIDLoadOffset, mediaIDLoadOffsetAfter); - EXPECT_EQ(miLoadRegOffset, miLoadRegOffsetAfter); - EXPECT_EQ(pipeControlOffset, pipeControlOffsetAfter); - EXPECT_EQ(gpgpuOffset, gpgpuOffsetAfter); - EXPECT_EQ(msfOffset2, msfOffsetAfter2); - EXPECT_EQ(miArbCheckOffset2, miArbCheckOffsetAfter2); - - if (walker) { - EXPECT_EQ(InterfaceDescriptorOffset, walker->getInterfaceDescriptorOffset()); - EXPECT_EQ(NumberOfHWThreadsPerWG, walker->getThreadWidthCounterMaximum()); - - EXPECT_EQ(16u, SIMDSize); - typename GPGPU_WALKER::SIMD_SIZE simd = GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16; - EXPECT_EQ(simd, walker->getSimdSize()); - - EXPECT_EQ(StartPoint.x, walker->getThreadGroupIdStartingX()); - EXPECT_EQ(StartPoint.y, walker->getThreadGroupIdStartingY()); - //EXPECT_EQ(StartPoint.z, walker->GetThreadGroupIdStartingZ()); - - EXPECT_EQ(DimSize.x, walker->getThreadGroupIdXDimension()); - EXPECT_EQ(DimSize.y, walker->getThreadGroupIdYDimension()); - //EXPECT_EQ(DimSize.z, walker->getThreadGroupIdZDimension()); - - uint32_t mask = static_cast(maxNBitValue(TotalLocalWorkSize % SIMDSize)); - if (mask == 0) - mask = ~0; - uint32_t yMask = 0xffffffff; - - EXPECT_EQ(mask, walker->getRightExecutionMask()); - EXPECT_EQ(yMask, walker->getBottomExecutionMask()); - - EXPECT_EQ(IndirectPayloadSize, walker->getIndirectDataLength()); - - EXPECT_EQ(IOHoffset, walker->getIndirectDataStartAddress()); - } else { - EXPECT_TRUE(false) << "GPGPU_WALKER commandnot found, patchGpGpuWalker could have corrupted prepopulated commands\n"; - } - - delete pDevQueueHw; -} diff --git a/opencl/test/unit_test/scheduler/scheduler_source_tests.h b/opencl/test/unit_test/scheduler/scheduler_source_tests.h deleted file mode 100644 index 866655bb27..0000000000 --- a/opencl/test/unit_test/scheduler/scheduler_source_tests.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/test/common/test_macros/test.h" - -#include "opencl/test/unit_test/mocks/mock_cl_device.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" - -#include "gtest/gtest.h" - -namespace NEO { - -class SchedulerSourceTest : public testing::Test { - public: - void SetUp() override { - pDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice); - } - void TearDown() override { - delete pDevice; - } - - MockClDevice *pDevice; - MockContext context; - - template - void givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); - template - void givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); - template - void givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); -}; - -} // namespace NEO diff --git a/opencl/test/unit_test/scheduler/scheduler_source_tests.inl b/opencl/test/unit_test/scheduler/scheduler_source_tests.inl deleted file mode 100644 index 49ae879232..0000000000 --- a/opencl/test/unit_test/scheduler/scheduler_source_tests.inl +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2018-2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/mocks/mock_device_queue.h" - -#include - -template -void SchedulerSourceTest::givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest() { - auto devQueueHw = std::unique_ptr>(new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0])); - - auto singleEnqueueSpace = devQueueHw->getMinimumSlbSize() + devQueueHw->getWaCommandsSize(); - EXPECT_EQ(singleEnqueueSpace, SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE); -} - -template -void SchedulerSourceTest::givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest() { - auto devQueueHw = std::unique_ptr>(new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0])); - devQueueHw->buildSlbDummyCommands(); - - auto slbCS = devQueueHw->getSlbCS(); - auto usedSpace = slbCS->getUsed(); - - auto spaceRequiredForEnqueuesAndBBStart = SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); - EXPECT_EQ(usedSpace, spaceRequiredForEnqueuesAndBBStart); -} - -template -void SchedulerSourceTest::givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest() { - EXPECT_EQ(DeviceQueue::numberOfDeviceEnqueues, static_cast(SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES)); -}