mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Remove device enqueue part 1
Remove: -tests with matcher for device enqueue -classes: MockDeviceQueueHw, DeviceQueueHw, SchedulerSimulation, DeviceQueueHwTest, KernelArgDevQueueTest -functions: forceDispatchScheduler, processDeviceEnqueue, dispatchScheduler Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
61e5e0687d
commit
a9ebb49fb5
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -8,9 +8,6 @@ set(BUILTIN_KERNELS_SIMULATION_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
extern bool conditionReady;
|
||||
extern std::thread threads[];
|
||||
|
||||
template <typename GfxFamily>
|
||||
class SchedulerSimulation {
|
||||
public:
|
||||
void runSchedulerSimulation(NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
void cleanSchedulerSimulation();
|
||||
|
||||
static void startScheduler(uint32_t index,
|
||||
NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
static void patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset);
|
||||
static bool enabled;
|
||||
static bool simulationRun;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::enabled = true;
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::simulationRun = false;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
|
||||
threadIDToLocalIDmap.clear();
|
||||
delete pGlobalBarrier;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
localSize[0] = NUM_OF_THREADS;
|
||||
localSize[1] = 1;
|
||||
localSize[2] = 1;
|
||||
|
||||
threadIDToLocalIDmap.clear();
|
||||
pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
|
||||
|
||||
// Spawn Thread ID == 0 on main thread
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
|
||||
threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
|
||||
}
|
||||
|
||||
conditionReady = true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
simulationRun = true;
|
||||
if (enabled) {
|
||||
initializeSchedulerSimulation(queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// start main thread with LID == 0
|
||||
startScheduler(0,
|
||||
queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// Wait for all threads on main thread
|
||||
if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
|
||||
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
|
||||
threads[i].join();
|
||||
|
||||
cleanSchedulerSimulation();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -13,8 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
|
||||
@ -43,8 +41,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -15,7 +15,6 @@
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/queue_helpers.h"
|
||||
#include "opencl/source/mem_obj/mem_obj.h"
|
||||
@ -502,8 +501,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
|
||||
void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo);
|
||||
void runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel);
|
||||
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
const size_t *bufferOrigin,
|
||||
@ -513,10 +510,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch);
|
||||
void processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
bool &blocking);
|
||||
|
||||
template <uint32_t commandType>
|
||||
void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
|
||||
@ -524,7 +517,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
Event *event,
|
||||
TagNodeBase *&hwTimeStamps,
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketDependencies ×tampPacketDependencies);
|
||||
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
|
||||
}
|
||||
} // namespace NEO
|
@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
|
||||
simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
|
||||
devQueueHw.getStackBuffer(),
|
||||
devQueueHw.getEventPoolBuffer(),
|
||||
devQueueHw.getSlbBuffer(),
|
||||
devQueueHw.getDshBuffer(),
|
||||
parentKernel.getKernelReflectionSurface(),
|
||||
devQueueHw.getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
||||
devQueueHw.getDebugQueue());
|
||||
}
|
||||
} // namespace NEO
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -9,7 +9,6 @@
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_base.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/memory_manager/resource_surface.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -22,7 +22,6 @@
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
@ -67,45 +66,40 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
|
||||
auto auxTranslationMode = AuxTranslationMode::None;
|
||||
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
kernel->updateAuxTranslationRequired();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
|
||||
if (!kernelObjsForAuxTranslation.empty()) {
|
||||
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
||||
builtInLock.takeOwnership(builder, this->context);
|
||||
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
||||
|
||||
kernel->updateAuxTranslationRequired();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
|
||||
if (!kernelObjsForAuxTranslation.empty()) {
|
||||
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
|
||||
}
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
||||
builtInLock.takeOwnership(builder, this->context);
|
||||
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
||||
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
UNRECOVERABLE_IF(kernel->isParentKernel);
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
|
||||
}
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
UNRECOVERABLE_IF(kernel->isParentKernel);
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Blit == auxTranslationMode) {
|
||||
@ -115,35 +109,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
|
||||
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObjectOrAbort<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
DispatchInfo dispatchInfo(devQueue->getClDevice(), &scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
|
||||
Vec3<size_t> workGroupCount = generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize());
|
||||
dispatchInfo.setTotalNumberOfWorkgroups(workGroupCount);
|
||||
dispatchInfo.setNumberOfWorkgroups(workGroupCount);
|
||||
|
||||
scheduler.createReflectionSurface();
|
||||
GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface();
|
||||
|
||||
devQueueHw->resetDeviceQueue();
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
reflectionSurface,
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation());
|
||||
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t commandType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
@ -163,8 +128,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
TagNodeBase *hwTimeStamps = nullptr;
|
||||
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
||||
@ -183,11 +146,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
|
||||
|
||||
if (parentKernel && !blockQueue) {
|
||||
while (!devQueueHw->isEMCriticalSectionFree())
|
||||
;
|
||||
}
|
||||
|
||||
enqueueHandlerHook(commandType, multiDispatchInfo);
|
||||
|
||||
bool clearDependenciesForSubCapture = false;
|
||||
@ -262,7 +220,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
|
||||
hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(),
|
||||
timestampPacketDependencies);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
||||
@ -310,9 +268,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
if (!blockQueue) {
|
||||
if (parentKernel) {
|
||||
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
|
||||
}
|
||||
|
||||
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
|
||||
csrDeps.makeResident(computeCommandStreamReceiver);
|
||||
@ -332,15 +287,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
taskLevel,
|
||||
printfHandler.get(),
|
||||
getBcsForAuxTranslation());
|
||||
|
||||
if (parentKernel) {
|
||||
computeCommandStreamReceiver.setMediaVFEStateDirty(true);
|
||||
|
||||
if (devQueueHw->getSchedulerReturnInstance() > 0) {
|
||||
waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
|
||||
}
|
||||
}
|
||||
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
|
||||
completionStamp = enqueueCommandWithoutKernel(
|
||||
surfacesForResidency,
|
||||
@ -438,7 +384,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
Event *event,
|
||||
TagNodeBase *&hwTimeStamps,
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketDependencies ×tampPacketDependencies) {
|
||||
@ -474,10 +419,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
|
||||
if (!blockQueue) {
|
||||
devQueueHw->resetDeviceQueue();
|
||||
devQueueHw->acquireEMCriticalSection();
|
||||
}
|
||||
}
|
||||
|
||||
if (event && this->isPerfCountersEnabled()) {
|
||||
@ -651,56 +592,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
bool &blocking) {
|
||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
|
||||
|
||||
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
|
||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
parentKernel,
|
||||
(uint32_t)multiDispatchInfo.size(),
|
||||
getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
|
||||
taskCount,
|
||||
hwTimeStamps,
|
||||
isCcsUsed);
|
||||
|
||||
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
parentKernel->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
|
||||
auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
*this->commandStream,
|
||||
*devQueueHw,
|
||||
preemptionMode,
|
||||
scheduler,
|
||||
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(getGpgpuCommandStreamReceiver());
|
||||
|
||||
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver());
|
||||
|
||||
if (parentKernel->isAuxTranslationRequired()) {
|
||||
blocking = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
|
||||
auto isQueueBlockedStatus = isQueueBlocked();
|
||||
@ -842,17 +733,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
IndirectHeap *dsh = nullptr;
|
||||
IndirectHeap *ioh = nullptr;
|
||||
|
||||
if (multiDispatchInfo.peekParentKernel()) {
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
|
||||
ioh = dsh;
|
||||
implicitFlush = true;
|
||||
} else {
|
||||
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
}
|
||||
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
auto allocNeedsFlushDC = false;
|
||||
if (!device->isFullRangeSvm()) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -20,7 +20,6 @@
|
||||
#include "opencl/source/command_queue/cl_local_work_size.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
@ -84,15 +83,6 @@ class GpgpuWalkerHelper {
|
||||
TagNodeBase *timestampPacketNode,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
static void dispatchScheduler(
|
||||
LinearStream &commandStream,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed);
|
||||
|
||||
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
||||
|
||||
private:
|
||||
@ -142,11 +132,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
|
||||
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
|
||||
if (heapType == IndirectHeap::SURFACE_STATE) {
|
||||
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
|
||||
{
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -191,17 +191,12 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
return expectedSizeCS;
|
||||
}
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
|
||||
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
}
|
||||
if (parentKernel) {
|
||||
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{});
|
||||
}
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
||||
|
@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
LinearStream &commandStream,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
} // namespace NEO
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -16,7 +16,6 @@
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_base.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -8,10 +8,6 @@ set(RUNTIME_SRCS_DEVICE_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE})
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -12,7 +12,6 @@
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/queue_helpers.h"
|
||||
|
||||
@ -186,10 +185,6 @@ void DeviceQueue::resetDeviceQueue() {
|
||||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
|
||||
return;
|
||||
}
|
||||
|
||||
IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -83,7 +83,6 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
||||
}
|
||||
|
||||
virtual void resetDeviceQueue();
|
||||
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
|
||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||
|
||||
void acquireEMCriticalSection() {
|
||||
|
@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class DeviceQueueHw : public DeviceQueue {
|
||||
using BaseClass = DeviceQueue;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
|
||||
public:
|
||||
DeviceQueueHw(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) : BaseClass(context, device, properties) {
|
||||
allocateSlbBuffer();
|
||||
offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables;
|
||||
igilQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
}
|
||||
|
||||
static DeviceQueue *create(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) {
|
||||
return new (std::nothrow) DeviceQueueHw<GfxFamily>(context, device, properties);
|
||||
}
|
||||
|
||||
IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override;
|
||||
|
||||
LinearStream *getSlbCS() { return &slbCS; }
|
||||
void resetDSH();
|
||||
|
||||
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
|
||||
|
||||
uint32_t getSchedulerReturnInstance() {
|
||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||
}
|
||||
|
||||
static size_t getCSPrefetchSize();
|
||||
|
||||
protected:
|
||||
void allocateSlbBuffer();
|
||||
size_t getMinimumSlbSize();
|
||||
size_t getWaCommandsSize();
|
||||
void addArbCheckCmdWa();
|
||||
void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder);
|
||||
void addLriCmdWa(bool setArbCheck);
|
||||
void addLriCmd(bool setArbCheck);
|
||||
void addPipeControlCmdWa(bool isNoopCmd = false);
|
||||
void initPipeControl(PIPE_CONTROL *pc);
|
||||
void buildSlbDummyCommands();
|
||||
void addDcFlushToPipeControlWa(PIPE_CONTROL *pc);
|
||||
|
||||
void addProfilingEndCmds(uint64_t timestampAddress);
|
||||
static size_t getProfilingEndCmdsSize();
|
||||
|
||||
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
|
||||
static size_t getMediaStateClearCmdsSize();
|
||||
|
||||
static size_t getExecutionModelCleanupSectionSize();
|
||||
static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
|
||||
|
||||
LinearStream slbCS;
|
||||
IGIL_CommandQueue *igilQueue = nullptr;
|
||||
};
|
||||
} // namespace NEO
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addProfilingEndCmds(uint64_t timestampAddress) {
|
||||
|
||||
auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL));
|
||||
auto pipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pipeControlCmd.setCommandStreamerStallEnable(true);
|
||||
*pipeControlSpace = pipeControlCmd;
|
||||
|
||||
//low part
|
||||
auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
||||
GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(&mICmdLow);
|
||||
mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
mICmdLow.setMemoryAddress(timestampAddress);
|
||||
*mICmdLowSpace = mICmdLow;
|
||||
}
|
||||
} // namespace NEO
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -9,7 +9,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
|
||||
#include "command_queue_helpers_gen12lp.inl"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -11,7 +11,6 @@
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen8/hw_info.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen9/hw_cmds_base.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -145,7 +145,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();
|
||||
|
||||
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
|
||||
bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
if (executionModelKernel) {
|
||||
while (!devQueue->isEMCriticalSectionFree())
|
||||
@ -175,39 +174,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
|
||||
if (executionModelKernel) {
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
|
||||
|
||||
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||
|
||||
scheduler.setArgs(devQueue->getQueueBuffer(),
|
||||
devQueue->getStackBuffer(),
|
||||
devQueue->getEventPoolBuffer(),
|
||||
devQueue->getSlbBuffer(),
|
||||
dsh->getGraphicsAllocation(),
|
||||
kernel->getKernelReflectionSurface(),
|
||||
devQueue->getQueueStorageBuffer(),
|
||||
ssh->getGraphicsAllocation(),
|
||||
devQueue->getDebugQueue());
|
||||
|
||||
devQueue->dispatchScheduler(
|
||||
*kernelOperation->commandStream,
|
||||
scheduler,
|
||||
preemptionMode,
|
||||
ssh,
|
||||
dsh,
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
// Update SLM usage
|
||||
slmUsed |= scheduler.getSlmTotalSize() > 0;
|
||||
|
||||
this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
|
||||
}
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
|
||||
#include "cl_api_tests.h"
|
||||
|
||||
@ -51,88 +50,4 @@ struct clSetDefaultDeviceCommandQueueApiTest : public api_tests {
|
||||
cl_command_queue deviceQueue = nullptr;
|
||||
};
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenSettingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, deviceQueue);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(static_cast<_device_queue *>(deviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue()));
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenReplacingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
|
||||
0,
|
||||
0};
|
||||
auto pDevice = castToObject<ClDevice>(testedClDevice);
|
||||
|
||||
if (pDevice->getDeviceInfo().maxOnDeviceQueues > 1) {
|
||||
auto newDeviceQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal);
|
||||
ASSERT_NE(nullptr, newDeviceQueue);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, newDeviceQueue);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(static_cast<_device_queue *>(newDeviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue()));
|
||||
|
||||
clReleaseCommandQueue(newDeviceQueue);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullContextWhenSettingDefaultDeviceQueueThenClInvalidContextErrorIsReturned, DeviceEnqueueSupport) {
|
||||
retVal = clSetDefaultDeviceCommandQueue(nullptr, testedClDevice, deviceQueue);
|
||||
ASSERT_EQ(CL_INVALID_CONTEXT, retVal);
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceWhenSettingDefaultDeviceQueueThenClInvalidDeviceErrorIsReturned, DeviceEnqueueSupport) {
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, nullptr, deviceQueue);
|
||||
ASSERT_EQ(CL_INVALID_DEVICE, retVal);
|
||||
}
|
||||
|
||||
TEST_F(clSetDefaultDeviceCommandQueueApiTest, GivenDeviceNotSupportingDeviceEnqueueWhenSettingDefaultDeviceQueueThenClInvalidOperationErrorIsReturned) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.ForceDeviceEnqueueSupport.set(0);
|
||||
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, nullptr);
|
||||
ASSERT_EQ(CL_INVALID_OPERATION, retVal);
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, nullptr);
|
||||
ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenHostQueueAsDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, 0, 0, 0};
|
||||
cl_command_queue hostQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal);
|
||||
ASSERT_NE(nullptr, hostQueue);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, hostQueue);
|
||||
ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
||||
|
||||
retVal = clReleaseCommandQueue(hostQueue);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenIncorrectDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
auto context2 = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
|
||||
0,
|
||||
0};
|
||||
cl_command_queue deviceQueueCtx2 = clCreateCommandQueueWithProperties(context2, testedClDevice, properties, &retVal);
|
||||
ASSERT_NE(nullptr, deviceQueueCtx2);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, deviceQueueCtx2);
|
||||
ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
||||
|
||||
retVal = clReleaseCommandQueue(deviceQueueCtx2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseContext(context2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
} // namespace ULT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -16,13 +16,11 @@
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h"
|
||||
#include "opencl/test/unit_test/helpers/cl_hw_parse.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include "reg_configs_common.h"
|
||||
@ -997,30 +995,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabled
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenMakeEnqueueBlocking, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
|
||||
|
||||
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
|
||||
cl_queue_properties queueProperties = {};
|
||||
auto mockDevQueue = std::make_unique<MockDeviceQueueHw<FamilyType>>(context, pClDevice, queueProperties);
|
||||
context->setDefaultDeviceQueue(mockDevQueue.get());
|
||||
|
||||
MockParentKernel::CreateParams createParams{};
|
||||
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context, createParams));
|
||||
parentKernel->initialize();
|
||||
|
||||
parentKernel->auxTranslationRequired = false;
|
||||
cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(0u, cmdQ.waitCalled);
|
||||
mockDevQueue->getIgilQueue()->m_controls.m_CriticalSection = 0;
|
||||
|
||||
parentKernel->auxTranslationRequired = true;
|
||||
cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, cmdQ.waitCalled);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelButNoDeviceQueueWhenEnqueueIsCalledThenItReturnsInvalidOperation) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
|
||||
|
||||
|
@ -1,13 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(IGDRCL_SRCS_tests_device_queue
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_device_queue_info_tests.cpp
|
||||
)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device_queue})
|
@ -1,793 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
using namespace DeviceHostQueue;
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenQueueMatchesUnderlyingBuffer, DeviceEnqueueSupport) {
|
||||
// profiling disabled
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
auto deviceQueueHw = castToHwType<FamilyType>(deviceQueue);
|
||||
|
||||
auto expected = getExpectedgilCmdQueueAfterReset(deviceQueue);
|
||||
deviceQueueHw->resetDeviceQueue();
|
||||
|
||||
EXPECT_EQ(0, memcmp(deviceQueueHw->getQueueBuffer()->getUnderlyingBuffer(),
|
||||
&expected, sizeof(IGIL_CommandQueue)));
|
||||
|
||||
delete deviceQueue;
|
||||
|
||||
//profiling enabled
|
||||
deviceQueue = createQueueObject(deviceQueueProperties::minimumPropertiesWithProfiling);
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
deviceQueueHw = castToHwType<FamilyType>(deviceQueue);
|
||||
|
||||
expected = getExpectedgilCmdQueueAfterReset(deviceQueue);
|
||||
deviceQueueHw->resetDeviceQueue();
|
||||
|
||||
EXPECT_EQ(1u, expected.m_controls.m_IsProfilingEnabled);
|
||||
|
||||
EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(),
|
||||
&expected, sizeof(IGIL_CommandQueue)));
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenFirstStackElementAtValueOneDeviceEnqueueSupport, DeviceEnqueueSupport) {
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
auto deviceQueueHw = castToHwType<FamilyType>(deviceQueue);
|
||||
|
||||
deviceQueueHw->resetDeviceQueue();
|
||||
|
||||
auto stack = static_cast<uint32_t *>(deviceQueue->getStackBuffer()->getUnderlyingBuffer());
|
||||
stack += ((deviceQueue->getStackBuffer()->getUnderlyingBufferSize() / sizeof(uint32_t)) - 1);
|
||||
EXPECT_EQ(*stack, 1u); // first stack element in surface at value "1"
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcquiringEmCrticalSectionThenSectionIsNotAcquired, DeviceEnqueueSupport) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.EnableNullHardware.set(1);
|
||||
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
auto deviceQueueHw = castToHwType<FamilyType>(deviceQueue);
|
||||
|
||||
deviceQueueHw->acquireEMCriticalSection();
|
||||
|
||||
EXPECT_TRUE(deviceQueueHw->isEMCriticalSectionFree());
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeIsGreaterThanZero, DeviceEnqueueSupport) {
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
|
||||
deviceQueueProperties::minimumProperties[0]);
|
||||
|
||||
EXPECT_NE(0u, mockDeviceQueueHw->getCSPrefetchSize());
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
|
||||
deviceQueueProperties::minimumProperties[0]);
|
||||
|
||||
mockDeviceQueueHw->addLriCmd(true);
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*slbCS, 0);
|
||||
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor);
|
||||
|
||||
MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor;
|
||||
|
||||
EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset());
|
||||
EXPECT_EQ(0x100u, loadRegImm->getDataDword());
|
||||
|
||||
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed());
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
|
||||
deviceQueueProperties::minimumProperties[0]);
|
||||
|
||||
mockDeviceQueueHw->addLriCmd(false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*slbCS, 0);
|
||||
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor);
|
||||
|
||||
MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor;
|
||||
|
||||
EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset());
|
||||
EXPECT_EQ(0u, loadRegImm->getDataDword());
|
||||
|
||||
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed());
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, GivenDeviceQueueHWWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) {
|
||||
auto timestampResolution = static_cast<float>(device->getProfilingTimerResolution());
|
||||
|
||||
auto deviceQueue = std::unique_ptr<DeviceQueue>(createQueueObject());
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto eventPoolBuffer = reinterpret_cast<IGIL_EventPool *>(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution);
|
||||
}
|
||||
|
||||
class DeviceQueueSlb : public DeviceQueueHwTest {
|
||||
public:
|
||||
template <typename Cmd>
|
||||
void *compareCmds(void *position, Cmd &cmd) {
|
||||
EXPECT_EQ(0, memcmp(position, &cmd, sizeof(Cmd)));
|
||||
return ptrOffset(position, sizeof(Cmd));
|
||||
}
|
||||
void *compareCmdsWithSize(void *position, void *cmd, size_t size) {
|
||||
EXPECT_EQ(0, memcmp(position, cmd, size));
|
||||
return ptrOffset(position, size);
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSizeIsAllocated, DeviceEnqueueSupport) {
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
LinearStream *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
size_t expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128;
|
||||
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
|
||||
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
|
||||
|
||||
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
|
||||
expectedSize += (4 * MemoryConstants::pageSize);
|
||||
|
||||
EXPECT_LE(expectedSize, slbCS->getAvailableSpace());
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAreCorrect, DeviceEnqueueSupport) {
|
||||
auto mockDeviceQueueHw =
|
||||
new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
|
||||
auto mockDeviceQueueHwWithProfiling =
|
||||
new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumPropertiesWithProfiling[0]);
|
||||
|
||||
LinearStream *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
auto expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128;
|
||||
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
|
||||
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
mockDeviceQueueHwWithProfiling->resetDeviceQueue();
|
||||
EXPECT_EQ(slbCS->getUsed(), expectedSize);
|
||||
EXPECT_EQ(mockDeviceQueueHwWithProfiling->getSlbCS()->getUsed(), expectedSize);
|
||||
|
||||
auto cmds = mockDeviceQueueHw->expectedCmds;
|
||||
auto cmdsWithProfiling = mockDeviceQueueHwWithProfiling->expectedCmds;
|
||||
|
||||
void *currCmd = slbCS->getCpuBase();
|
||||
void *currCmdWithProfiling = mockDeviceQueueHwWithProfiling->getSlbCS()->getCpuBase();
|
||||
for (size_t i = 0; i < 128; i++) {
|
||||
currCmd = compareCmds(currCmd, cmds.mediaStateFlush);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush);
|
||||
|
||||
if (mockDeviceQueueHw->arbCheckWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.arbCheck);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck);
|
||||
}
|
||||
if (mockDeviceQueueHw->miAtomicWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.miAtomic);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.miAtomic);
|
||||
}
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.mediaIdLoad);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaIdLoad);
|
||||
|
||||
if (mockDeviceQueueHw->lriWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.lriTrue);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriTrue);
|
||||
}
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl);
|
||||
|
||||
if (mockDeviceQueueHw->pipeControlWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl);
|
||||
}
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.gpgpuWalker);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.gpgpuWalker);
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.mediaStateFlush);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush);
|
||||
|
||||
if (mockDeviceQueueHw->arbCheckWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.arbCheck);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck);
|
||||
}
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.pipeControl);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl);
|
||||
|
||||
if (mockDeviceQueueHw->pipeControlWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.pipeControl);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl);
|
||||
}
|
||||
|
||||
if (mockDeviceQueueHw->lriWa) {
|
||||
currCmd = compareCmds(currCmd, cmds.lriFalse);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriFalse);
|
||||
}
|
||||
|
||||
currCmd = compareCmdsWithSize(currCmd, cmds.prefetch, DeviceQueueHw<FamilyType>::getCSPrefetchSize());
|
||||
currCmdWithProfiling = compareCmdsWithSize(currCmdWithProfiling, cmdsWithProfiling.prefetch, DeviceQueueHw<FamilyType>::getCSPrefetchSize());
|
||||
}
|
||||
|
||||
currCmd = compareCmds(currCmd, cmds.bbStart);
|
||||
currCmdWithProfiling = compareCmds(currCmdWithProfiling, mockDeviceQueueHwWithProfiling->expectedCmds.bbStart);
|
||||
|
||||
delete mockDeviceQueueHw;
|
||||
delete mockDeviceQueueHwWithProfiling;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect, DeviceEnqueueSupport) {
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
|
||||
deviceQueueProperties::minimumProperties[0]);
|
||||
|
||||
auto slb = mockDeviceQueueHw->getSlbBuffer();
|
||||
auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize();
|
||||
auto slbCopy = malloc(slb->getUnderlyingBufferSize());
|
||||
memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize());
|
||||
memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize());
|
||||
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
// slbEndOffset < commandsSize * 128
|
||||
// always fill only 1 enqueue (after offset)
|
||||
auto offset = static_cast<int>(commandsSize) * 50;
|
||||
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset;
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset
|
||||
EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset),
|
||||
slbCopy, commandsSize)); // change 1 enqueue
|
||||
EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize),
|
||||
slbCopy, offset)); // dont touch memory after (offset + 1 enqueue)
|
||||
compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128),
|
||||
mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always on the same place
|
||||
|
||||
// slbEndOffset == commandsSize * 128
|
||||
// dont fill commands
|
||||
memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize());
|
||||
offset = static_cast<int>(commandsSize) * 128;
|
||||
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast<int>(commandsSize);
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues
|
||||
compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128),
|
||||
mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always in the same place
|
||||
|
||||
delete mockDeviceQueueHw;
|
||||
free(slbCopy);
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsCorrect, DeviceEnqueueSupport) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
|
||||
auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize();
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
|
||||
uint32_t taskCount = 7;
|
||||
|
||||
mockDeviceQueueHw->buildSlbDummyCommands();
|
||||
uint64_t tagAddress = 0x123450000;
|
||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, nullptr, tagAddress, taskCount);
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize);
|
||||
size_t cleanupSectionOffsetToParse = cleanupSectionOffset;
|
||||
|
||||
size_t slbUsed = slbCS->getUsed();
|
||||
slbUsed = alignUp(slbUsed, MemoryConstants::pageSize);
|
||||
size_t slbMax = slbCS->getMaxAvailableSpace();
|
||||
|
||||
// 4 pages padding expected after cleanup section
|
||||
EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed);
|
||||
|
||||
if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
cleanupSectionOffsetToParse += GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2;
|
||||
}
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*slbCS, cleanupSectionOffsetToParse);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset;
|
||||
EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress);
|
||||
EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize);
|
||||
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
|
||||
|
||||
bool tagWriteFound = false;
|
||||
while (auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*(++pipeControlItor))) {
|
||||
if (pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
if (tagAddress == NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd)) {
|
||||
tagWriteFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_TRUE(tagWriteFound);
|
||||
|
||||
auto bbEndItor = find<MI_BATCH_BUFFER_END *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), bbEndItor);
|
||||
MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor;
|
||||
uint64_t bbEndAddres = (uint64_t)bbEnd;
|
||||
|
||||
EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres);
|
||||
|
||||
delete mockParentKernel;
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmCleanupSectionIsAdded, DeviceEnqueueSupport) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
|
||||
auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize();
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
|
||||
uint32_t taskCount = 7;
|
||||
|
||||
auto hwTimeStamp = pCommandQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
|
||||
mockDeviceQueueHw->buildSlbDummyCommands();
|
||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, 0x123, taskCount);
|
||||
|
||||
uint64_t eventTimestampAddr = igilCmdQueue->m_controls.m_EventTimestampAddress;
|
||||
uint64_t contextCompleteAddr = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS);
|
||||
EXPECT_EQ(contextCompleteAddr, eventTimestampAddr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize);
|
||||
size_t cleanupSectionOffsetToParse = cleanupSectionOffset;
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*slbCS, cleanupSectionOffsetToParse);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset;
|
||||
EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress);
|
||||
EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize);
|
||||
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
|
||||
if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) {
|
||||
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor);
|
||||
|
||||
pipeControlItor = find<PIPE_CONTROL *>(loadRegImmItor, hwParser.cmdList.end());
|
||||
pipeControlItor++;
|
||||
}
|
||||
|
||||
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
|
||||
|
||||
PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor;
|
||||
EXPECT_NE(0u, pipeControl->getCommandStreamerStallEnable());
|
||||
|
||||
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(pipeControlItor, hwParser.cmdList.end());
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor);
|
||||
|
||||
MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor;
|
||||
|
||||
EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset());
|
||||
EXPECT_EQ(0u, loadRegImm->getDataDword());
|
||||
|
||||
pipeControlItor++;
|
||||
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
|
||||
|
||||
auto bbEndItor = find<MI_BATCH_BUFFER_END *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), bbEndItor);
|
||||
MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor;
|
||||
uint64_t bbEndAddres = (uint64_t)bbEnd;
|
||||
|
||||
EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres);
|
||||
|
||||
delete mockParentKernel;
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBufferParamsAreCorrect, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(deviceQueue);
|
||||
|
||||
auto heap = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
ASSERT_NE(nullptr, heap);
|
||||
|
||||
auto dshBuffer = deviceQueue->getDshBuffer()->getUnderlyingBuffer();
|
||||
auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
auto size = heap->getAvailableSpace();
|
||||
auto heapMemory = heap->getCpuBase();
|
||||
|
||||
// ExecutionModel DSH is offseted by colorCalcState, ParentKernel Interface Descriptor Data is located in first table just after colorCalcState
|
||||
|
||||
EXPECT_EQ(dshBufferSize - DeviceQueue::colorCalcStateSize, size);
|
||||
EXPECT_EQ(dshBuffer, heapMemory);
|
||||
EXPECT_EQ(ptrOffset(dshBuffer, DeviceQueue::colorCalcStateSize), heap->getSpace(0));
|
||||
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOffsetIsCorrect, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(deviceQueue);
|
||||
|
||||
size_t offsetDsh = sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::interfaceDescriptorEntries * DeviceQueue::numberOfIDTables + DeviceQueue::colorCalcStateSize;
|
||||
|
||||
EXPECT_EQ(devQueueHw->getDshOffset(), offsetDsh);
|
||||
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture,
|
||||
public ::testing::WithParamInterface<std::tuple<const char *, const char *>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
ExecutionModelKernelFixture::SetUp(std::get<0>(GetParam()), std::get<1>(GetParam()));
|
||||
cl_queue_properties properties[5] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
|
||||
0, 0, 0};
|
||||
cl_int errcodeRet = 0;
|
||||
|
||||
clDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())};
|
||||
device = &clDevice->getDevice();
|
||||
context = new MockContext();
|
||||
ASSERT_NE(nullptr, context);
|
||||
|
||||
devQueue = DeviceQueue::create(context, clDevice,
|
||||
*properties,
|
||||
errcodeRet);
|
||||
|
||||
ASSERT_NE(nullptr, devQueue);
|
||||
}
|
||||
void TearDown() override {
|
||||
if (IsSkipped()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (devQueue) {
|
||||
delete devQueue;
|
||||
}
|
||||
if (context) {
|
||||
delete context;
|
||||
}
|
||||
if (clDevice) {
|
||||
delete clDevice;
|
||||
}
|
||||
ExecutionModelKernelFixture::TearDown();
|
||||
}
|
||||
|
||||
Device *device = nullptr;
|
||||
ClDevice *clDevice = nullptr;
|
||||
DeviceQueue *devQueue = nullptr;
|
||||
MockContext *context = nullptr;
|
||||
};
|
||||
|
||||
HWTEST2_P(DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed, DeviceEnqueueSupport) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
auto usedBeforeSSH = ssh->getUsed();
|
||||
auto usedBeforeDSH = dsh->getUsed();
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
|
||||
auto usedAfterSSH = ssh->getUsed();
|
||||
auto usedAfterDSH = dsh->getUsed();
|
||||
|
||||
EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH);
|
||||
|
||||
EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
|
||||
HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet, DeviceEnqueueSupport) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
uint32_t parentCount = 4;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
|
||||
HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly, DeviceEnqueueSupport) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
MockContext mockContext;
|
||||
MockDeviceQueueHw<FamilyType> *devQueueHw = new MockDeviceQueueHw<FamilyType>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
delete devQueueHw;
|
||||
}
|
||||
|
||||
HWTEST2_P(DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled, DeviceEnqueueSupport) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
MockContext mockContext;
|
||||
auto devQueueHw = std::make_unique<MockDeviceQueueHw<FamilyType>>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto iddCount = blockManager->getCount();
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
const_cast<KernelDescriptor &>(blockManager->getBlockKernelInfo(i)->kernelDescriptor).kernelAttributes.barrierCount = 1U;
|
||||
}
|
||||
|
||||
auto surfaceStateHeapSize =
|
||||
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
|
||||
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
|
||||
auto iddStartIndex = parentCount;
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable());
|
||||
}
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
}
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(DeviceQueueHwWithKernel,
|
||||
DeviceQueueHwWithKernel,
|
||||
::testing::Combine(
|
||||
::testing::Values(binaryFile),
|
||||
::testing::ValuesIn(KernelNames)));
|
||||
|
||||
struct TheSimplestDeviceQueueFixture : testing::Test {
|
||||
void SetUp() override {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
}
|
||||
void TearDown() override {
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenResettingDeviceQueueThenEarlyReturnValuesAreSet, DeviceEnqueueSupport) {
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.SchedulerSimulationReturnInstance.set(3);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context;
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
|
||||
EXPECT_EQ(3u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturn);
|
||||
EXPECT_EQ(0u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturnCounter);
|
||||
}
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddihMediaStateClearCmdsThenCmdsAreAddedCorrectly, DeviceEnqueueSupport) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context;
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
|
||||
mockDeviceQueueHw->addMediaStateClearCmds();
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*slbCS, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
|
||||
|
||||
if (mockDeviceQueueHw->pipeControlWa) {
|
||||
pipeControlItor++;
|
||||
EXPECT_NE(hwParser.cmdList.end(), pipeControlItor);
|
||||
}
|
||||
|
||||
PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor;
|
||||
EXPECT_TRUE(pipeControl->getGenericMediaStateClear());
|
||||
|
||||
auto mediaVfeStateItor = find<MEDIA_VFE_STATE *>(pipeControlItor, hwParser.cmdList.end());
|
||||
|
||||
EXPECT_NE(hwParser.cmdList.end(), mediaVfeStateItor);
|
||||
}
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddingExecutionModelCleanupSectionThenMediaStateIsCleared, DeviceEnqueueSupport) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
|
||||
class MockDeviceQueueWithMediaStateClearRegistering : public MockDeviceQueueHw<FamilyType> {
|
||||
public:
|
||||
MockDeviceQueueWithMediaStateClearRegistering(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) : MockDeviceQueueHw<FamilyType>(context, device, properties) {
|
||||
}
|
||||
|
||||
bool addMediaStateClearCmdsCalled = false;
|
||||
void addMediaStateClearCmds() override {
|
||||
addMediaStateClearCmdsCalled = true;
|
||||
}
|
||||
};
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context(device.get());
|
||||
std::unique_ptr<MockDeviceQueueWithMediaStateClearRegistering> mockDeviceQueueHw(new MockDeviceQueueWithMediaStateClearRegistering(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
std::unique_ptr<MockParentKernel> mockParentKernel(MockParentKernel::create(context));
|
||||
uint32_t taskCount = 7;
|
||||
mockDeviceQueueHw->buildSlbDummyCommands();
|
||||
|
||||
EXPECT_FALSE(mockDeviceQueueHw->addMediaStateClearCmdsCalled);
|
||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel.get(), nullptr, 0x123, taskCount);
|
||||
EXPECT_TRUE(mockDeviceQueueHw->addMediaStateClearCmdsCalled);
|
||||
}
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingMediaStateClearThenCmdsSizeIsCorrect, DeviceEnqueueSupport) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context;
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
size_t expectedSize = 2 * sizeof(PIPE_CONTROL) + sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE);
|
||||
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getMediaStateClearCmdsSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingExecutionModelCleanupThenSectionSizeIsCorrect, DeviceEnqueueSupport) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context;
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
size_t expectedSize = sizeof(PIPE_CONTROL) +
|
||||
2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_MATH) +
|
||||
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE);
|
||||
|
||||
expectedSize += MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize();
|
||||
expectedSize += MockDeviceQueueHw<FamilyType>::getMediaStateClearCmdsSize();
|
||||
|
||||
expectedSize += 4 * sizeof(PIPE_CONTROL);
|
||||
expectedSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
|
||||
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingProfilingEndThenCmdsSizeIsCorrect, DeviceEnqueueSupport) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockContext context;
|
||||
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(&context, device.get(), deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM);
|
||||
|
||||
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getProfilingEndCmdsSize());
|
||||
}
|
@ -1,299 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device/device_info.h"
|
||||
#include "shared/test/common/test_macros/matchers.h"
|
||||
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
using namespace NEO;
|
||||
using namespace DeviceHostQueue;
|
||||
|
||||
using DeviceQueueSimpleTest = ::testing::Test;
|
||||
|
||||
HWTEST2_F(DeviceQueueSimpleTest, WhenExecutionModelDispatchIsSetupThenNoAdditionalActionsOccur, DeviceEnqueueSupport) {
|
||||
DeviceQueue devQueue;
|
||||
char buffer[20];
|
||||
|
||||
memset(buffer, 1, 20);
|
||||
|
||||
size_t size = 20;
|
||||
IndirectHeap ssh(buffer, size);
|
||||
IndirectHeap dsh(buffer, size);
|
||||
devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0, false);
|
||||
|
||||
EXPECT_EQ(0u, ssh.getUsed());
|
||||
|
||||
for (uint32_t i = 0; i < 20; i++) {
|
||||
EXPECT_EQ(1, buffer[i]);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueSimpleTest, WhenResettingDeviceQueueThenIndirectHeapIsNotUsed, DeviceEnqueueSupport) {
|
||||
DeviceQueue devQueue;
|
||||
devQueue.resetDeviceQueue();
|
||||
EXPECT_EQ(nullptr, devQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||
}
|
||||
|
||||
class DeviceQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
|
||||
public:
|
||||
using BaseClass = DeviceHostQueueFixture<DeviceQueue>;
|
||||
void SetUp() override {
|
||||
BaseClass::SetUp();
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pContext);
|
||||
device = pContext->getDevice(0);
|
||||
|
||||
ASSERT_NE(device, nullptr);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
BaseClass::TearDown();
|
||||
}
|
||||
|
||||
void checkQueueBuffer(cl_uint expedtedSize) {
|
||||
auto alignedExpectedSize = alignUp(expedtedSize, MemoryConstants::pageSize);
|
||||
EXPECT_EQ(deviceQueue->getQueueSize(), expedtedSize);
|
||||
ASSERT_NE(deviceQueue->getQueueBuffer(), nullptr);
|
||||
EXPECT_EQ(deviceQueue->getQueueBuffer()->getUnderlyingBufferSize(), alignedExpectedSize);
|
||||
}
|
||||
|
||||
DeviceQueue *deviceQueue;
|
||||
ClDevice *device;
|
||||
};
|
||||
|
||||
HWTEST2_F(DeviceQueueTest, GivenDeviceQueueCapWhenCreatingAdditionalDeviceQueuesThenQueueIsNotCreated, DeviceEnqueueSupport) {
|
||||
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = 1;
|
||||
|
||||
auto deviceQueue1 = createQueueObject();
|
||||
ASSERT_NE(deviceQueue1, nullptr);
|
||||
EXPECT_EQ(deviceQueue1->getReference(), 1);
|
||||
|
||||
auto deviceQueue2 = createQueueObject();
|
||||
EXPECT_EQ(deviceQueue2, nullptr);
|
||||
|
||||
delete deviceQueue1;
|
||||
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) {
|
||||
auto timestampResolution = static_cast<float>(device->getProfilingTimerResolution());
|
||||
|
||||
auto deviceQueue = std::unique_ptr<DeviceQueue>(createQueueObject());
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto eventPoolBuffer = reinterpret_cast<IGIL_EventPool *>(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution);
|
||||
}
|
||||
|
||||
typedef DeviceQueueTest DeviceQueueBuffer;
|
||||
|
||||
HWTEST2_F(DeviceQueueBuffer, GivenNoPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) {
|
||||
auto &deviceInfo = device->getDeviceInfo();
|
||||
deviceQueue = createQueueObject(); // only minimal properties
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize);
|
||||
deviceQueue->release();
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueBuffer, GivenInvalidPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) {
|
||||
cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1],
|
||||
CL_QUEUE_SIZE, 0, 0};
|
||||
auto &deviceInfo = device->getDeviceInfo();
|
||||
|
||||
deviceQueue = createQueueObject(properties); // zero size
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize);
|
||||
delete deviceQueue;
|
||||
|
||||
properties[3] = static_cast<cl_queue_properties>(deviceInfo.queueOnDeviceMaxSize + 1);
|
||||
deviceQueue = createQueueObject(properties); // greater than max
|
||||
EXPECT_EQ(deviceQueue, nullptr);
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueBuffer, GivenValidSizeWhenCreatingQueueThenProvidedSizeIsSet, DeviceEnqueueSupport) {
|
||||
auto &deviceInfo = device->getDeviceInfo();
|
||||
cl_uint validSize = deviceInfo.queueOnDevicePreferredSize - 1;
|
||||
cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1],
|
||||
CL_QUEUE_SIZE, static_cast<cl_queue_properties>(validSize),
|
||||
0};
|
||||
|
||||
EXPECT_NE(validSize, alignUp(validSize, MemoryConstants::pageSize)); // create aligned
|
||||
deviceQueue = createQueueObject(properties);
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
checkQueueBuffer(validSize);
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueBuffer, WhenDeviceQueueIsCreatedThenItIsCorrectlyInitialized, DeviceEnqueueSupport) {
|
||||
auto &deviceInfo = device->getDeviceInfo();
|
||||
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
IGIL_CommandQueue expectedIgilCmdQueue = getExpectedInitIgilCmdQueue(deviceQueue);
|
||||
EXPECT_EQ(static_cast<uint32_t>(deviceQueue->isProfilingEnabled()), expectedIgilCmdQueue.m_controls.m_IsProfilingEnabled);
|
||||
|
||||
IGIL_EventPool expectedIgilEventPool = {0, 0, 0};
|
||||
expectedIgilEventPool.m_head = 0;
|
||||
expectedIgilEventPool.m_size = deviceInfo.maxOnDeviceEvents;
|
||||
expectedIgilEventPool.m_TimestampResolution = static_cast<float>(device->getProfilingTimerResolution());
|
||||
|
||||
// initialized header
|
||||
EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(),
|
||||
&expectedIgilCmdQueue, sizeof(IGIL_CommandQueue)));
|
||||
|
||||
EXPECT_EQ(0, memcmp(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer(),
|
||||
&expectedIgilEventPool, sizeof(IGIL_EventPool)));
|
||||
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
typedef DeviceQueueTest DeviceQueueStackBuffer;
|
||||
|
||||
HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenAllocatedResourcesAreZeroed, DeviceEnqueueSupport) {
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
EXPECT_THAT(deviceQueue->getQueueStorageBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize()));
|
||||
EXPECT_THAT(deviceQueue->getStackBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getStackBuffer()->getUnderlyingBufferSize()));
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenStackBufferIsAllocated, DeviceEnqueueSupport) {
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto maxEnqueue = deviceQueue->getQueueSize() / sizeof(IGIL_CommandHeader);
|
||||
//stack can hold at most 3 full loads of commands
|
||||
auto expectedStackSize = maxEnqueue * sizeof(uint32_t) * 3;
|
||||
expectedStackSize = alignUp(expectedStackSize, MemoryConstants::pageSize);
|
||||
|
||||
ASSERT_NE(deviceQueue->getStackBuffer(), nullptr);
|
||||
EXPECT_EQ(deviceQueue->getStackBuffer()->getUnderlyingBufferSize(), expectedStackSize);
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
typedef DeviceQueueTest DeviceQueueStorageBuffer;
|
||||
|
||||
HWTEST2_F(DeviceQueueStorageBuffer, WhenDeviceQueueIsCreatedThenStorageBufferIsAllocated, DeviceEnqueueSupport) {
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
auto expectedStorageSize = deviceQueue->getQueueBuffer()->getUnderlyingBufferSize() * 2;
|
||||
expectedStorageSize = alignUp(expectedStorageSize, MemoryConstants::pageSize);
|
||||
|
||||
ASSERT_NE(deviceQueue->getQueueStorageBuffer(), nullptr);
|
||||
EXPECT_EQ(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize(), expectedStorageSize);
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
typedef DeviceQueueTest DefaultDeviceQueue;
|
||||
|
||||
HWTEST2_F(DefaultDeviceQueue, GivenSingleDeviceQueueIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) {
|
||||
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0};
|
||||
|
||||
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = 1;
|
||||
|
||||
auto deviceQueue1 = createQueueObject(properties);
|
||||
ASSERT_NE(deviceQueue1, nullptr);
|
||||
|
||||
EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1);
|
||||
EXPECT_EQ(deviceQueue1->getReference(), 1);
|
||||
|
||||
auto deviceQueue2 = createQueueObject(properties);
|
||||
ASSERT_NE(deviceQueue2, nullptr);
|
||||
|
||||
EXPECT_EQ(deviceQueue2, deviceQueue1);
|
||||
|
||||
EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1);
|
||||
EXPECT_EQ(deviceQueue1->getReference(), 2);
|
||||
|
||||
deviceQueue1->release();
|
||||
deviceQueue2->release();
|
||||
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues;
|
||||
}
|
||||
|
||||
HWTEST2_F(DefaultDeviceQueue, GivenMultipleDeviceQueuesIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) {
|
||||
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0};
|
||||
|
||||
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = 2;
|
||||
|
||||
auto deviceQueue1 = createQueueObject(properties);
|
||||
ASSERT_NE(deviceQueue1, nullptr);
|
||||
|
||||
EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1);
|
||||
EXPECT_EQ(deviceQueue1->getReference(), 1);
|
||||
|
||||
auto deviceQueue2 = createQueueObject(properties);
|
||||
ASSERT_NE(deviceQueue2, nullptr);
|
||||
|
||||
EXPECT_EQ(deviceQueue2, deviceQueue1);
|
||||
|
||||
EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1);
|
||||
EXPECT_EQ(deviceQueue1->getReference(), 2);
|
||||
|
||||
deviceQueue1->release();
|
||||
deviceQueue2->release();
|
||||
|
||||
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues;
|
||||
}
|
||||
|
||||
typedef DeviceQueueTest DeviceQueueEventPool;
|
||||
|
||||
HWTEST2_F(DeviceQueueEventPool, WhenDeviceQueueIsCreatedThenEventPoolBufferIsAllocated, DeviceEnqueueSupport) {
|
||||
auto &deviceInfo = device->getDeviceInfo();
|
||||
|
||||
// number of events + event pool representation
|
||||
auto expectedSize = static_cast<uint32_t>(deviceInfo.maxOnDeviceEvents * sizeof(IGIL_DeviceEvent) +
|
||||
sizeof(IGIL_EventPool));
|
||||
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
|
||||
|
||||
auto deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
ASSERT_NE(deviceQueue->getEventPoolBuffer(), nullptr);
|
||||
EXPECT_EQ(deviceQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), expectedSize);
|
||||
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBufferIsAllocated, DeviceEnqueueSupport) {
|
||||
deviceQueue = createQueueObject();
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
|
||||
ASSERT_NE(deviceQueue->getDshBuffer(), nullptr);
|
||||
auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
EXPECT_LE(761856u, dshBufferSize);
|
||||
delete deviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceQueueTest, WhenDispatchingSchedulerThenNoAssertsOccur, DeviceEnqueueSupport) {
|
||||
DeviceQueue devQueue;
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
MockCommandQueue cmdQ(nullptr, nullptr, 0, false);
|
||||
KernelInfo info;
|
||||
MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device);
|
||||
LinearStream cmdStream;
|
||||
|
||||
devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false);
|
||||
delete kernel;
|
||||
}
|
@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
|
||||
using namespace NEO;
|
||||
using namespace DeviceHostQueue;
|
||||
|
||||
class GetDeviceQueueInfoTest : public DeviceHostQueueFixture<DeviceQueue> {
|
||||
public:
|
||||
using BaseClass = DeviceHostQueueFixture<DeviceQueue>;
|
||||
|
||||
void SetUp() override {
|
||||
BaseClass::SetUp();
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pContext);
|
||||
deviceQueue = createQueueObject(deviceQueueProperties::allProperties);
|
||||
ASSERT_NE(deviceQueue, nullptr);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (deviceQueue)
|
||||
delete deviceQueue;
|
||||
BaseClass::TearDown();
|
||||
}
|
||||
|
||||
DeviceQueue *deviceQueue = nullptr;
|
||||
};
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueContextWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_context contextReturned = nullptr;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
CL_QUEUE_CONTEXT,
|
||||
sizeof(contextReturned),
|
||||
&contextReturned,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ((cl_context)pContext, contextReturned);
|
||||
}
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_device_id deviceExpected = testedClDevice;
|
||||
cl_device_id deviceIdReturned = nullptr;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
CL_QUEUE_DEVICE,
|
||||
sizeof(deviceIdReturned),
|
||||
&deviceIdReturned,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(deviceExpected, deviceIdReturned);
|
||||
}
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueuePropertiesWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_command_queue_properties propertiesReturned = 0;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
CL_QUEUE_PROPERTIES,
|
||||
sizeof(propertiesReturned),
|
||||
&propertiesReturned,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(deviceQueueProperties::allProperties[1], propertiesReturned);
|
||||
}
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueSizeWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_uint queueSizeReturned = 0;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
CL_QUEUE_SIZE,
|
||||
sizeof(queueSizeReturned),
|
||||
&queueSizeReturned,
|
||||
nullptr);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(deviceQueue->getQueueSize(), queueSizeReturned);
|
||||
}
|
||||
|
||||
// OCL 2.1
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceDefaultWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
|
||||
cl_command_queue commandQueueReturned = nullptr;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
CL_QUEUE_DEVICE_DEFAULT,
|
||||
sizeof(commandQueueReturned),
|
||||
&commandQueueReturned,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// 1 device queue is supported which is default
|
||||
EXPECT_EQ(deviceQueue, commandQueueReturned);
|
||||
}
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, WhenGettingDeviceQueueInfoThenProfilingIsEnabled, DeviceEnqueueSupport) {
|
||||
EXPECT_TRUE(deviceQueue->isProfilingEnabled());
|
||||
}
|
||||
|
||||
HWTEST2_F(GetDeviceQueueInfoTest, GivenInvalidParamWhenGettingDeviceQueueInfoThenInvalidValueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
uint32_t tempValue = 0;
|
||||
|
||||
retVal = deviceQueue->getCommandQueueInfo(
|
||||
static_cast<cl_command_queue_info>(0),
|
||||
sizeof(tempValue),
|
||||
&tempValue,
|
||||
nullptr);
|
||||
EXPECT_EQ(tempValue, 0u);
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(IGDRCL_SRCS_tests_execution_model
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_execution_model_kernel_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_dispatch_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_dispatch_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/submit_blocked_parent_kernel_tests.cpp
|
||||
)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_execution_model})
|
@ -1,621 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/helpers/per_thread_data.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/gtest_helpers.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
|
||||
|
||||
typedef ExecutionModelKernelTest ParentKernelEnqueueTest;
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
||||
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
|
||||
|
||||
EXPECT_NE(nullptr, executionModelDsh);
|
||||
|
||||
INTERFACE_DESCRIPTOR_DATA *idData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize));
|
||||
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
|
||||
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
|
||||
|
||||
auto &hardwareInfo = pClDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelIsaAddress += pKernel->getKernelInfo().kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") {
|
||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
EXPECT_NE(0u, idData[0].getSamplerCount());
|
||||
} else {
|
||||
EXPECT_EQ(0u, idData[0].getSamplerCount());
|
||||
}
|
||||
EXPECT_NE(0u, idData[0].getSamplerStatePointer());
|
||||
}
|
||||
|
||||
EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress), idData[0].getKernelStartPointer());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh());
|
||||
|
||||
const uint32_t blockFirstIndex = 1;
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
|
||||
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
|
||||
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize / grfSize;
|
||||
|
||||
auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), grfSize, numChannels);
|
||||
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
|
||||
EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer());
|
||||
|
||||
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
|
||||
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
auto &hardwareInfo = pClDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
expectedBlockKernelAddress += pBlockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident, DeviceEnqueueSupport) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateSurface == nullptr) {
|
||||
privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateAllocation == nullptr) {
|
||||
privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
|
||||
EXPECT_FALSE(csr.isMadeResident(privateAllocation));
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident, DeviceEnqueueSupport) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident, DeviceEnqueueSupport) {
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
blockKernelManager->makeInternalAllocationsResident(csr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
size_t executionModelDSHUsedBefore = dsh->getUsed();
|
||||
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
size_t executionModelDSHUsedAfterFirst = dsh->getUsed();
|
||||
EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst);
|
||||
|
||||
pDevQueueHw->resetDeviceQueue();
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
size_t executionModelDSHUsedAfterSecond = dsh->getUsed();
|
||||
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated, DeviceEnqueueSupport) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
EXPECT_EQ(ssh, ssh2);
|
||||
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied, DeviceEnqueueSupport) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
// prealign the ssh so that it won't need to be realigned in enqueueKernel
|
||||
// this way, we can assume the location in memory into which the surface states
|
||||
// will be coies
|
||||
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
// mark the assumed place for surface states
|
||||
size_t parentSshOffset = 0;
|
||||
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
|
||||
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
|
||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, *pClDevice, nullptr);
|
||||
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
if (blockKernel->getNumberOfBindingTableStates() > 0) {
|
||||
ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
||||
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
|
||||
|
||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
|
||||
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
|
||||
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
||||
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
|
||||
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer));
|
||||
auto *srcSurfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer));
|
||||
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
|
||||
}
|
||||
|
||||
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
|
||||
}
|
||||
|
||||
delete blockKernel;
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenReflectionSurfaceIsCreated, DeviceEnqueueSupport) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset, DeviceEnqueueSupport) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
cl_queue_properties properties[3] = {0};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, pClDevice, properties[0]);
|
||||
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
// Acquire CS to check if reset queue was called
|
||||
mockDevQueue.acquireEMCriticalSection();
|
||||
|
||||
auto mockEvent = make_releaseable<UserEvent>(context);
|
||||
|
||||
cl_event eventBlocking = mockEvent.get();
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr);
|
||||
|
||||
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
|
||||
}
|
||||
|
||||
HWTEST2_P(ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident, DeviceEnqueueSupport) {
|
||||
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
ASSERT_NE(nullptr, pDevQueueHw);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsrBase<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
auto &cmdStream = mockCSR->getCS(0);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
auto stateBaseAddressItor = hwParser.itorStateBaseAddress;
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor);
|
||||
|
||||
auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor;
|
||||
|
||||
uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress();
|
||||
|
||||
EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress());
|
||||
|
||||
bool dshAllocationResident = false;
|
||||
|
||||
for (auto allocation : mockCSR->madeResidentGfxAllocations) {
|
||||
if (allocation == pDevQueue->getDshBuffer()) {
|
||||
dshAllocationResident = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(dshAllocationResident);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest,
|
||||
ParentKernelEnqueueTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(binaryFile),
|
||||
::testing::ValuesIn(KernelNames)));
|
||||
|
||||
class ParentKernelEnqueueFixture : public ExecutionModelSchedulerTest,
|
||||
public testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ExecutionModelSchedulerTest::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ExecutionModelSchedulerTest::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenDefaultDeviceQueueAndEventPoolIsPatched, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
const auto &implicitArgs = parentKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs;
|
||||
|
||||
const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
|
||||
if (isValidOffset(defaultQueueSurfaceAddress.stateless)) {
|
||||
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData()), defaultQueueSurfaceAddress.stateless);
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation);
|
||||
}
|
||||
|
||||
const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
|
||||
if (isValidOffset(eventPoolSurfaceAddress.stateless)) {
|
||||
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData()), eventPoolSurfaceAddress.stateless);
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenBlocksDSHOnReflectionSurfaceArePatchedWithDeviceQueueAndEventPoolAddresses, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
void *reflectionSurface = parentKernel->getKernelReflectionSurface()->getUnderlyingBuffer();
|
||||
|
||||
BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const auto implicitArgs = blockManager->getBlockKernelInfo(i)->kernelDescriptor.payloadMappings.implicitArgs;
|
||||
const uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i);
|
||||
|
||||
const auto &defaultQueue = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
|
||||
if (defaultQueue.pointerSize == sizeof(uint64_t)) {
|
||||
EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless));
|
||||
} else {
|
||||
EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless));
|
||||
}
|
||||
|
||||
const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
|
||||
if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) {
|
||||
EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless));
|
||||
} else {
|
||||
EXPECT_EQ((uint32_t)pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenDeviceQueueCriticalSetionIsAcquired, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree());
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_FALSE(pDevQueueHw->isEMCriticalSectionFree());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToBlockedQueueThenDeviceQueueCriticalSetionIsNotAcquired, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
auto mockEvent = make_releaseable<MockEvent<UserEvent>>(context);
|
||||
cl_event eventBlocking = mockEvent.get();
|
||||
|
||||
EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree());
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 1, &eventBlocking, nullptr);
|
||||
|
||||
EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree());
|
||||
mockEvent->setStatus(-1);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenFlushCsrWithSlm, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t execStamp;
|
||||
auto mockCsr = new MockCsr<FamilyType>(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockCsr->slmUsedInLastFlushTask);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedWithSchedulerReturnInstanceThenRunSimulation, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.SchedulerSimulationReturnInstance.set(1);
|
||||
|
||||
MockDeviceQueueHw<FamilyType> *mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(context, pClDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
|
||||
context->setDefaultDeviceQueue(mockDeviceQueueHw);
|
||||
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t execStamp;
|
||||
auto mockCsr = new MockCsr<FamilyType>(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<FamilyType>::enabled = false;
|
||||
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<FamilyType>::enabled = true;
|
||||
|
||||
EXPECT_TRUE(BuiltinKernelsSimulation::SchedulerSimulation<FamilyType>::simulationRun);
|
||||
delete mockDeviceQueueHw;
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKernelIsSubmittedThenItIsFlushed, DeviceEnqueueSupport) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
|
||||
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
|
||||
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
MockContext context(pClDevice);
|
||||
MockParentKernel::CreateParams createParams{};
|
||||
std::unique_ptr<MockParentKernel> kernelToRun(MockParentKernel::create(context, createParams));
|
||||
|
||||
pCmdQ->enqueueKernel(kernelToRun.get(), 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
|
||||
EXPECT_EQ(1, mockCsr->flushCalledCount);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenMarkCsrMediaVfeStateDirty, DeviceEnqueueSupport) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t execStamp;
|
||||
auto mockCsr = new MockCsr<FamilyType>(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
mockCsr->setMediaVFEStateDirty(false);
|
||||
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockCsr->peekMediaVfeStateDirty());
|
||||
}
|
||||
}
|
@ -1,416 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/perf_counter.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
|
||||
#include "opencl/source/command_queue/enqueue_kernel.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/sampler/sampler.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
|
||||
|
||||
typedef ExecutionModelKernelTest ParentKernelDispatchTest;
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedBefore);
|
||||
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedAfter);
|
||||
|
||||
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
EXPECT_EQ(ishOfDevQueue, ish);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
|
||||
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
EXPECT_EQ(dshOfDevQueue, ioh);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) {
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
auto iohUsed = ioh.getUsed();
|
||||
EXPECT_EQ(0u, iohUsed);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) {
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
|
||||
|
||||
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
|
||||
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
|
||||
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
ASSERT_NE(nullptr, blockedCommandsData);
|
||||
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
|
||||
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
||||
|
||||
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) +
|
||||
pKernel->getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) +
|
||||
UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
|
||||
if (pKernel->usesBindfulAddressingForBuffers() || pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesImages) {
|
||||
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
||||
}
|
||||
|
||||
EXPECT_GE(minRequiredSize, sshUsed);
|
||||
// Total SSH size including EM must be greater then ssh allocated
|
||||
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest,
|
||||
ParentKernelDispatchTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(binaryFile),
|
||||
::testing::ValuesIn(KernelNames)));
|
||||
|
||||
typedef ParentKernelCommandQueueFixture ParentKernelCommandStreamFixture;
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhenCommandStreamIsAcquiredThenSizeAccountsForSchedulerDispatch) {
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
|
||||
|
||||
DispatchInfo dispatchInfo(device, mockParentKernel, 1, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{0, 0, 0});
|
||||
MultiDispatchInfo multiDispatchInfo(mockParentKernel);
|
||||
|
||||
size_t size = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel, {});
|
||||
size_t numOfKernels = MemoryConstants::pageSize / size;
|
||||
|
||||
size_t rest = MemoryConstants::pageSize - (numOfKernels * size);
|
||||
|
||||
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||
size_t schedulerSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler, {});
|
||||
|
||||
while (rest >= schedulerSize) {
|
||||
numOfKernels++;
|
||||
rest = alignUp(numOfKernels * size, MemoryConstants::pageSize) - numOfKernels * size;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < numOfKernels; i++) {
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
}
|
||||
|
||||
size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize);
|
||||
|
||||
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false,
|
||||
false, multiDispatchInfo, nullptr, 0, false, false);
|
||||
|
||||
EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace());
|
||||
|
||||
delete mockParentKernel;
|
||||
}
|
||||
|
||||
class MockParentKernelDispatch : public ExecutionModelSchedulerTest,
|
||||
public testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableTimestampPacket.set(0);
|
||||
ExecutionModelSchedulerTest::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ExecutionModelSchedulerTest::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
|
||||
auto commandStream = new LinearStream();
|
||||
|
||||
auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
|
||||
|
||||
return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
|
||||
}
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
};
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(mockParentKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
ASSERT_NE(nullptr, blockedCommandsData);
|
||||
|
||||
EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get());
|
||||
delete mockParentKernel;
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispatchedThenMediaInterfaceDescriptorLoadIsCorrectlyProgrammed) {
|
||||
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(mockParentKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
LinearStream *commandStream = &pCmdQ->getCS(0);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*commandStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
|
||||
|
||||
auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)hwParser.getCommand<MEDIA_INTERFACE_DESCRIPTOR_LOAD>(hwParser.cmdList.begin(), hwParser.itorWalker);
|
||||
|
||||
ASSERT_NE(nullptr, pCmd);
|
||||
|
||||
uint32_t offsetInterfaceDescriptorData = DeviceQueue::colorCalcStateSize;
|
||||
uint32_t sizeInterfaceDescriptorData = sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
|
||||
EXPECT_EQ(offsetInterfaceDescriptorData, pCmd->getInterfaceDescriptorDataStartAddress());
|
||||
EXPECT_EQ(sizeInterfaceDescriptorData, pCmd->getInterfaceDescriptorTotalLength());
|
||||
|
||||
delete mockParentKernel;
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedThenNewSSHIsAllocated) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||
|
||||
uint32_t testSshUse = 20u;
|
||||
uint32_t expectedSshUse = testSshUse + UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
ssh.getSpace(testSshUse);
|
||||
EXPECT_EQ(expectedSshUse, ssh.getUsed());
|
||||
|
||||
// Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating
|
||||
// new one by obtaining from reuse list returns the same allocation and heap buffer does not differ
|
||||
// If parent is not using SSH, then heap obtained has zero usage and the same buffer
|
||||
ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize);
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(mockParentKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_EQ(0u, ssh.getUsed());
|
||||
|
||||
delete mockParentKernel;
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatchedThenExistingSSHIsUsed) {
|
||||
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||
auto defaultSshUsage = UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
EXPECT_EQ(defaultSshUsage, ssh.getUsed());
|
||||
|
||||
auto *bufferMemory = ssh.getCpuBase();
|
||||
|
||||
DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_EQ(bufferMemory, ssh.getCpuBase());
|
||||
|
||||
delete mockParentKernel;
|
||||
}
|
||||
}
|
@ -1,249 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include "opencl/source/command_queue/enqueue_kernel.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest,
|
||||
public testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ExecutionModelSchedulerTest::SetUp();
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ExecutionModelSchedulerTest::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenProgrammingIsCorrect, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
auto &scheduler = static_cast<MockSchedulerKernel &>(context->getSchedulerKernel());
|
||||
|
||||
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
||||
auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
|
||||
|
||||
EXPECT_NE(nullptr, executionModelDsh);
|
||||
|
||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
// Setup heaps in pCmdQ
|
||||
MultiDispatchInfo multiDispatchinfo(&scheduler);
|
||||
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
|
||||
false, false, false, multiDispatchinfo,
|
||||
nullptr, 0, false, false);
|
||||
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
|
||||
|
||||
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
|
||||
pCmdQ->getCS(0),
|
||||
*pDevQueueHw,
|
||||
pDevice->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
auto localWorkSize = scheduler.getLocalWorkSizeValues();
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *localWorkSize[0]);
|
||||
EXPECT_EQ(1u, *localWorkSize[1]);
|
||||
EXPECT_EQ(1u, *localWorkSize[2]);
|
||||
|
||||
auto enqueuedLocalWorkSize = scheduler.getEnqueuedLocalWorkSizeValues();
|
||||
if (enqueuedLocalWorkSize[0] != &Kernel::dummyPatchLocation) {
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *enqueuedLocalWorkSize[0]);
|
||||
EXPECT_EQ(1u, *enqueuedLocalWorkSize[1]);
|
||||
EXPECT_EQ(1u, *enqueuedLocalWorkSize[2]);
|
||||
}
|
||||
|
||||
auto numWorkGroups = scheduler.getNumWorkGroupsValues();
|
||||
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *numWorkGroups[0]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[1]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[2]);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
|
||||
|
||||
// Before Walker There must be PC
|
||||
PIPE_CONTROL *pc = hwParser.getCommand<PIPE_CONTROL>(hwParser.cmdList.begin(), hwParser.itorWalker);
|
||||
ASSERT_NE(nullptr, pc);
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
|
||||
auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad;
|
||||
|
||||
uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress();
|
||||
uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength();
|
||||
|
||||
uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize;
|
||||
uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
|
||||
EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed);
|
||||
EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed);
|
||||
|
||||
auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker;
|
||||
|
||||
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
|
||||
|
||||
size_t numWorkgroupsProgrammed[3] = {0, 0, 0};
|
||||
|
||||
uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum();
|
||||
|
||||
EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup);
|
||||
|
||||
numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension();
|
||||
numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension();
|
||||
numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension();
|
||||
|
||||
EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]);
|
||||
EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]);
|
||||
EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]);
|
||||
|
||||
typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize();
|
||||
EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize);
|
||||
|
||||
EXPECT_EQ(0u, walker->getThreadGroupIdStartingX());
|
||||
EXPECT_EQ(0u, walker->getThreadGroupIdStartingY());
|
||||
EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ());
|
||||
|
||||
uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress();
|
||||
assert(offsetCrossThreadDataProgrammed % 64 == 0);
|
||||
size_t curbeSize = scheduler.getCurbeSize();
|
||||
size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account
|
||||
EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed);
|
||||
|
||||
EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset());
|
||||
|
||||
auto numChannels = 3;
|
||||
auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize;
|
||||
auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws());
|
||||
|
||||
auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
|
||||
auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength());
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
|
||||
auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker;
|
||||
|
||||
uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress();
|
||||
EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenStandardCmdqIohIsNotUsed, DeviceEnqueueSupport) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||
|
||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
// Setup heaps in pCmdQ
|
||||
|
||||
MultiDispatchInfo multiDispatchinfo(&scheduler);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
|
||||
nullptr, 0, false, false);
|
||||
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
|
||||
|
||||
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
|
||||
pCmdQ->getCS(0),
|
||||
*pDevQueueHw,
|
||||
pDevice->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
EXPECT_EQ(0u, ioh.getUsed());
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, GivenEarlyReturnSetToFirstInstanceWhenDispatchingSchedulerThenBbStartCmdIsNotInserted, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
|
||||
auto *igilQueue = mockDevQueue.getIgilQueue();
|
||||
|
||||
ASSERT_NE(nullptr, igilQueue);
|
||||
igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
|
||||
|
||||
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||
|
||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
|
||||
// Setup heaps in pCmdQ
|
||||
MultiDispatchInfo multiDispatchinfo(&scheduler);
|
||||
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
|
||||
false, false, false, multiDispatchinfo,
|
||||
nullptr, 0, false, false);
|
||||
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
|
||||
|
||||
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
|
||||
pCmdQ->getCS(0),
|
||||
mockDevQueue,
|
||||
device->getPreemptionMode(),
|
||||
scheduler,
|
||||
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
|
||||
EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
|
||||
}
|
||||
|
||||
HWTEST2_F(ExecutionModelSchedulerFixture, WhenForceDispatchingSchedulerThenSchedulerKernelIsEnqueued, DeviceEnqueueSupport) {
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.ForceDispatchScheduler.set(true);
|
||||
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
MockCommandQueueHw<FamilyType> *mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
|
||||
|
||||
mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel);
|
||||
|
||||
delete mockCmdQ;
|
||||
}
|
@ -1,452 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/hw_timestamps.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
class SubmitBlockedParentKernelFixture : public ExecutionModelSchedulerTest,
|
||||
public testing::Test {
|
||||
void SetUp() override {
|
||||
ExecutionModelSchedulerTest::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ExecutionModelSchedulerTest::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFamily> {
|
||||
using BaseClass = DeviceQueueHw<GfxFamily>;
|
||||
|
||||
public:
|
||||
MockDeviceQueueHwWithCriticalSectionRelease(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) : BaseClass(context, device, properties) {}
|
||||
|
||||
bool isEMCriticalSectionFree() override {
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(DeviceQueue::queueBuffer->getUnderlyingBuffer());
|
||||
|
||||
criticalSectioncheckCounter++;
|
||||
|
||||
if (criticalSectioncheckCounter == maxCounter) {
|
||||
igilCmdQueue->m_controls.m_CriticalSection = DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
||||
return true;
|
||||
}
|
||||
|
||||
return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw<GfxFamily>::ExecutionModelCriticalSection::Free;
|
||||
}
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override {
|
||||
indirectStateSetup = true;
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount, isCcsUsed);
|
||||
}
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override {
|
||||
cleanupSectionAdded = true;
|
||||
|
||||
auto hwTimestampT = static_cast<TagNode<HwTimeStamps> *>(hwTimeStamp);
|
||||
|
||||
timestampAddedInCleanupSection = hwTimestampT ? hwTimestampT->tagForCpuAccess : nullptr;
|
||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount);
|
||||
}
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override {
|
||||
schedulerDispatched = true;
|
||||
return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed);
|
||||
}
|
||||
|
||||
uint32_t criticalSectioncheckCounter = 0;
|
||||
const uint32_t maxCounter = 10;
|
||||
bool indirectStateSetup = false;
|
||||
bool cleanupSectionAdded = false;
|
||||
bool schedulerDispatched = false;
|
||||
HwTimeStamps *timestampAddedInCleanupSection = nullptr;
|
||||
};
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
mockDevQueue.acquireEMCriticalSection();
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter);
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
// add initial offset of colorCalState
|
||||
dsh->getSpace(DeviceQueue::colorCalcStateSize);
|
||||
|
||||
uint64_t ValueToFillDsh = 5;
|
||||
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
|
||||
|
||||
// Fill Interface Descriptor Data
|
||||
*dshVal = ValueToFillDsh;
|
||||
|
||||
// Move to parent DSH Offset
|
||||
size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t);
|
||||
dsh->getSpace(alignToOffsetDshSize);
|
||||
|
||||
// Fill with pattern
|
||||
dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
|
||||
*dshVal = ValueToFillDsh;
|
||||
|
||||
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
|
||||
|
||||
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
//device queue dsh is not changed
|
||||
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
|
||||
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_TRUE(mockDevQueue.indirectStateSetup);
|
||||
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
auto timestamp = static_cast<TagNode<HwTimeStamps> *>(pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag());
|
||||
cmdComputeKernel->timestamp = timestamp;
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
|
||||
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t heapSize = 20;
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_TRUE(mockDevQueue.schedulerDispatched);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapsWhenParentKernelIsSubmittedThenQueueHeapsAreNotUsed, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
MockCommandQueue cmdQ(context, device, properties, false);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
size_t heapSize = 20;
|
||||
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
|
||||
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
||||
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
|
||||
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
|
||||
|
||||
size_t usedSize = 4u;
|
||||
|
||||
queueSsh.getSpace(usedSize);
|
||||
queueDsh.getSpace(usedSize);
|
||||
queueIoh.getSpace(usedSize);
|
||||
|
||||
auto intialSshUsed = queueSsh.getUsed();
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
|
||||
EXPECT_EQ(usedSize, queueDsh.getUsed());
|
||||
EXPECT_EQ(usedSize, queueIoh.getUsed());
|
||||
EXPECT_EQ(intialSshUsed, queueSsh.getUsed());
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||
|
||||
size_t heapSize = 20;
|
||||
|
||||
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
|
||||
size_t sshSize = 1000;
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
|
||||
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
|
||||
dsh->getSpace(mockDevQueue.getDshOffset());
|
||||
|
||||
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);
|
||||
|
||||
void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
|
||||
|
||||
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
|
||||
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
PreemptionMode preemptionMode = device->getPreemptionMode();
|
||||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
|
||||
|
||||
EXPECT_EQ(sshBuffer, newSshBuffer);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
auto parentKernel = MockParentKernel::create(*context);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
|
||||
parentKernel->createReflectionSurface();
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
DispatchInfo dispatchInfo(device, parentKernel, 1, workItems, nullptr, globalOffsets);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(parentKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_NE(nullptr, blockedCommandsData);
|
||||
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
|
||||
|
||||
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
|
||||
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
|
||||
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
|
||||
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
|
||||
}
|
@ -18,7 +18,6 @@ set(IGDRCL_SRCS_tests_fixtures
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_matcher.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/execution_model_kernel_fixture.h
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -11,7 +11,6 @@
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/test/unit_test/api/cl_api_tests.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
@ -54,37 +53,4 @@ class DeviceHostQueueFixture : public ApiFixture<>,
|
||||
cl_queue_properties properties[5] = deviceQueueProperties::noProperties);
|
||||
};
|
||||
|
||||
class DeviceQueueHwTest : public DeviceHostQueueFixture<DeviceQueue> {
|
||||
public:
|
||||
using BaseClass = DeviceHostQueueFixture<DeviceQueue>;
|
||||
void SetUp() override {
|
||||
BaseClass::SetUp();
|
||||
device = castToObject<ClDevice>(testedClDevice);
|
||||
ASSERT_NE(device, nullptr);
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
BaseClass::TearDown();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
DeviceQueueHw<GfxFamily> *castToHwType(DeviceQueue *deviceQueue) {
|
||||
return reinterpret_cast<DeviceQueueHw<GfxFamily> *>(deviceQueue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t getMinimumSlbSize() {
|
||||
return sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
|
||||
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD) +
|
||||
sizeof(typename GfxFamily::PIPE_CONTROL) +
|
||||
sizeof(typename GfxFamily::GPGPU_WALKER) +
|
||||
sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
|
||||
sizeof(typename GfxFamily::PIPE_CONTROL) +
|
||||
DeviceQueueHw<GfxFamily>::getCSPrefetchSize(); // prefetch size
|
||||
}
|
||||
|
||||
DeviceQueue *deviceQueue;
|
||||
ClDevice *device;
|
||||
};
|
||||
} // namespace DeviceHostQueue
|
||||
|
@ -1,17 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
struct DeviceEnqueueSupport {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::deviceEnqueueSupport;
|
||||
}
|
||||
};
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -36,12 +36,10 @@
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/program/create.inl"
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
#include "opencl/test/unit_test/program/program_tests.h"
|
||||
@ -1071,161 +1069,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenK
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST2_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithDeviceEnqueueIsUsedThenKernelCreateAndSubmitCallbacksAreNotCalled, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice);
|
||||
|
||||
gtpinCallbacks.onContextCreate = OnContextCreate;
|
||||
gtpinCallbacks.onContextDestroy = OnContextDestroy;
|
||||
gtpinCallbacks.onKernelCreate = OnKernelCreate;
|
||||
gtpinCallbacks.onKernelSubmit = OnKernelSubmit;
|
||||
gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate;
|
||||
gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete;
|
||||
retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin);
|
||||
|
||||
cl_device_id device = (cl_device_id)pDevice;
|
||||
cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, context);
|
||||
auto pContext = castToObject<Context>(context);
|
||||
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
|
||||
|
||||
cl_queue_properties devQproperties = 0;
|
||||
auto devQ = std::make_unique<DeviceQueueHw<FamilyType>>(pContext, pDevice, devQproperties);
|
||||
pContext->setDefaultDeviceQueue(devQ.get());
|
||||
|
||||
cl_command_queue cmdQ = nullptr;
|
||||
cl_queue_properties properties = 0;
|
||||
|
||||
cmdQ = clCreateCommandQueue(context, device, properties, &retVal);
|
||||
ASSERT_NE(nullptr, cmdQ);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// Prepare a kernel with fake Execution Environment
|
||||
char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'};
|
||||
size_t binSize = 10;
|
||||
MockProgram *pProgram = Program::createBuiltInFromGenBinary<MockProgram>(pContext, pContext->getDevices(), &binary[0], binSize, &retVal);
|
||||
ASSERT_NE(nullptr, pProgram);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
char *pBin = &binary[0];
|
||||
SProgramBinaryHeader *pBHdr = (SProgramBinaryHeader *)pBin;
|
||||
pBHdr->Magic = iOpenCL::MAGIC_CL;
|
||||
pBHdr->Version = iOpenCL::CURRENT_ICBE_VERSION;
|
||||
pBHdr->Device = pDevice->getHardwareInfo().platform.eRenderCoreFamily;
|
||||
pBHdr->GPUPointerSizeInBytes = 8;
|
||||
pBHdr->NumberOfKernels = 1;
|
||||
pBHdr->SteppingId = 0;
|
||||
pBHdr->PatchListSize = 0;
|
||||
pBin += sizeof(SProgramBinaryHeader);
|
||||
binSize += sizeof(SProgramBinaryHeader);
|
||||
|
||||
SKernelBinaryHeaderCommon *pKHdr = (SKernelBinaryHeaderCommon *)pBin;
|
||||
pKHdr->CheckSum = 0;
|
||||
pKHdr->ShaderHashCode = 0;
|
||||
pKHdr->KernelNameSize = 4;
|
||||
pKHdr->PatchListSize = sizeof(SPatchExecutionEnvironment) + sizeof(SPatchBindingTableState);
|
||||
pKHdr->KernelHeapSize = 16;
|
||||
pKHdr->GeneralStateHeapSize = 0;
|
||||
pKHdr->DynamicStateHeapSize = 0;
|
||||
pKHdr->SurfaceStateHeapSize = 64;
|
||||
pKHdr->KernelUnpaddedSize = 0;
|
||||
pBin += sizeof(SKernelBinaryHeaderCommon);
|
||||
binSize += sizeof(SKernelBinaryHeaderCommon);
|
||||
char *pKernelBin = pBin;
|
||||
|
||||
strcpy(pBin, "Tst");
|
||||
pBin += pKHdr->KernelNameSize;
|
||||
binSize += pKHdr->KernelNameSize;
|
||||
|
||||
strcpy(pBin, "fake_ISA_code__");
|
||||
pBin += pKHdr->KernelHeapSize;
|
||||
binSize += pKHdr->KernelHeapSize;
|
||||
|
||||
memset(pBin, 0, pKHdr->SurfaceStateHeapSize);
|
||||
pBin += pKHdr->SurfaceStateHeapSize;
|
||||
binSize += pKHdr->SurfaceStateHeapSize;
|
||||
|
||||
SPatchExecutionEnvironment *pPatch1 = (SPatchExecutionEnvironment *)pBin;
|
||||
pPatch1->Token = iOpenCL::PATCH_TOKEN_EXECUTION_ENVIRONMENT;
|
||||
pPatch1->Size = sizeof(iOpenCL::SPatchExecutionEnvironment);
|
||||
pPatch1->RequiredWorkGroupSizeX = 0;
|
||||
pPatch1->RequiredWorkGroupSizeY = 0;
|
||||
pPatch1->RequiredWorkGroupSizeZ = 0;
|
||||
pPatch1->LargestCompiledSIMDSize = 8;
|
||||
pPatch1->CompiledSubGroupsNumber = 0;
|
||||
pPatch1->HasBarriers = 0;
|
||||
pPatch1->DisableMidThreadPreemption = 0;
|
||||
pPatch1->HasDeviceEnqueue = 1;
|
||||
pPatch1->MayAccessUndeclaredResource = 0;
|
||||
pPatch1->UsesFencesForReadWriteImages = 0;
|
||||
pPatch1->UsesStatelessSpillFill = 0;
|
||||
pPatch1->IsCoherent = 0;
|
||||
pPatch1->IsInitializer = 0;
|
||||
pPatch1->IsFinalizer = 0;
|
||||
pPatch1->SubgroupIndependentForwardProgressRequired = 0;
|
||||
pPatch1->CompiledForGreaterThan4GBBuffers = 0;
|
||||
pBin += sizeof(SPatchExecutionEnvironment);
|
||||
binSize += sizeof(SPatchExecutionEnvironment);
|
||||
|
||||
SPatchBindingTableState *pPatch2 = (SPatchBindingTableState *)pBin;
|
||||
pPatch2->Token = iOpenCL::PATCH_TOKEN_BINDING_TABLE_STATE;
|
||||
pPatch2->Size = sizeof(iOpenCL::SPatchBindingTableState);
|
||||
pPatch2->Offset = 0;
|
||||
pPatch2->Count = 1;
|
||||
pPatch2->SurfaceStateOffset = 0;
|
||||
binSize += sizeof(SPatchBindingTableState);
|
||||
|
||||
uint32_t kernelBinSize =
|
||||
pKHdr->DynamicStateHeapSize +
|
||||
pKHdr->GeneralStateHeapSize +
|
||||
pKHdr->KernelHeapSize +
|
||||
pKHdr->KernelNameSize +
|
||||
pKHdr->PatchListSize +
|
||||
pKHdr->SurfaceStateHeapSize;
|
||||
uint64_t hashValue = Hash::hash(reinterpret_cast<const char *>(pKernelBin), kernelBinSize);
|
||||
pKHdr->CheckSum = static_cast<uint32_t>(hashValue & 0xFFFFFFFF);
|
||||
|
||||
pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(&binary[0], binSize);
|
||||
pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = binSize;
|
||||
retVal = pProgram->processGenBinary(*pDevice);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// Verify that GT-Pin Kernel Create callback is not called
|
||||
int prevCount = KernelCreateCallbackCount;
|
||||
cl_kernel kernel = clCreateKernel(pProgram, "Tst", &retVal);
|
||||
EXPECT_NE(nullptr, kernel);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(prevCount, KernelCreateCallbackCount);
|
||||
|
||||
int prevCount2 = KernelSubmitCallbackCount;
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
size_t localWorkSize[3] = {1, 1, 1};
|
||||
|
||||
MockParentKernel *parentKernel = MockParentKernel::create(*pContext);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex);
|
||||
auto pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos);
|
||||
|
||||
retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(prevCount2, KernelSubmitCallbackCount);
|
||||
|
||||
// Cleanup
|
||||
retVal = clReleaseKernel(kernel);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseProgram(pProgram);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseCommandQueue(cmdQ);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseContext(context);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) {
|
||||
gtpinCallbacks.onContextCreate = OnContextCreate;
|
||||
gtpinCallbacks.onContextDestroy = OnContextDestroy;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -21,7 +21,6 @@ set(IGDRCL_SRCS_tests_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_is_patched_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_dev_queue_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_reflection_surface_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_arg_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_tests.cpp
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -15,11 +15,9 @@
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/mem_obj/pipe.h"
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/image_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_pipe.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
@ -424,48 +422,6 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
EXPECT_EQ(3, sampler->getRefInternalCount());
|
||||
}
|
||||
|
||||
HWTEST2_F(CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect, DeviceEnqueueSupport) {
|
||||
pKernelInfo->addArgDevQueue(0, 0x20, sizeof(void *));
|
||||
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device1);
|
||||
|
||||
cl_queue_properties queueProps[5] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
|
||||
0, 0, 0};
|
||||
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context.get(), device1, queueProps[0]);
|
||||
auto clDeviceQueue = static_cast<cl_command_queue>(&mockDevQueue);
|
||||
auto rootDeviceIndex = *context->getRootDeviceIndices().begin();
|
||||
|
||||
pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue);
|
||||
pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgDevQueue);
|
||||
|
||||
retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_command_queue), &clDeviceQueue);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
|
||||
EXPECT_EQ(Kernel::DEVICE_QUEUE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
|
||||
auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(static_cast<uintptr_t>(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg);
|
||||
}
|
||||
|
||||
TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
char *svmPtr = new char[256];
|
||||
|
||||
|
@ -1,125 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
using namespace NEO;
|
||||
using namespace DeviceHostQueue;
|
||||
|
||||
struct KernelArgDevQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
DeviceHostQueueFixture<DeviceQueue>::SetUp();
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice);
|
||||
|
||||
pDeviceQueue = createQueueObject();
|
||||
|
||||
pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
pKernelInfo->addArgDevQueue(0, crossThreadOffset, 4);
|
||||
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pDevice));
|
||||
pKernel = new MockKernel(program.get(), *pKernelInfo, *pDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
uint8_t pCrossThreadData[crossThreadDataSize];
|
||||
memset(pCrossThreadData, crossThreadDataInit, sizeof(pCrossThreadData));
|
||||
pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
delete pKernel;
|
||||
|
||||
delete pDeviceQueue;
|
||||
|
||||
DeviceHostQueueFixture<DeviceQueue>::TearDown();
|
||||
}
|
||||
|
||||
bool crossThreadDataUnchanged() {
|
||||
for (uint32_t i = 0; i < crossThreadDataSize; i++) {
|
||||
if (pKernel->mockCrossThreadData[i] != crossThreadDataInit) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const uint32_t crossThreadDataSize = 0x10;
|
||||
static const char crossThreadDataInit = 0x7e;
|
||||
const CrossThreadDataOffset crossThreadOffset = 0x4;
|
||||
|
||||
std::unique_ptr<MockProgram> program;
|
||||
DeviceQueue *pDeviceQueue = nullptr;
|
||||
MockKernel *pKernel = nullptr;
|
||||
std::unique_ptr<MockKernelInfo> pKernelInfo;
|
||||
};
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenKernelWithDevQueueArgWhenSettingArgHandleThenCorrectHandleIsSet, DeviceEnqueueSupport) {
|
||||
EXPECT_EQ(pKernel->kernelArgHandlers[0], &Kernel::setArgDevQueue);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenDeviceQueueWhenSettingArgDevQueueThenCorrectlyPatched, DeviceEnqueueSupport) {
|
||||
auto clDeviceQueue = static_cast<cl_command_queue>(pDeviceQueue);
|
||||
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clDeviceQueue);
|
||||
EXPECT_EQ(ret, CL_SUCCESS);
|
||||
|
||||
auto gpuAddress = static_cast<uint32_t>(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch());
|
||||
auto patchLocation = ptrOffset(pKernel->mockCrossThreadData.data(), crossThreadOffset);
|
||||
EXPECT_EQ(*(reinterpret_cast<uint32_t *>(patchLocation)), gpuAddress);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenCommandQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
auto clCmdQueue = static_cast<cl_command_queue>(pCommandQueue);
|
||||
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clCmdQueue);
|
||||
EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE);
|
||||
EXPECT_EQ(crossThreadDataUnchanged(), true);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenNonQueueObjectWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
Buffer *buffer = new MockBuffer();
|
||||
auto clBuffer = static_cast<cl_mem>(buffer);
|
||||
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clBuffer);
|
||||
EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE);
|
||||
EXPECT_EQ(crossThreadDataUnchanged(), true);
|
||||
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenInvalidQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
char *pFakeDeviceQueue = new char[sizeof(DeviceQueue)];
|
||||
auto clFakeDeviceQueue = reinterpret_cast<cl_command_queue *>(pFakeDeviceQueue);
|
||||
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clFakeDeviceQueue);
|
||||
EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE);
|
||||
EXPECT_EQ(crossThreadDataUnchanged(), true);
|
||||
|
||||
delete[] pFakeDeviceQueue;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenNullDeviceQueueWhenSettingArgDevQueueThenInvalidArgValueErrorIsReturned, DeviceEnqueueSupport) {
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), nullptr);
|
||||
EXPECT_EQ(ret, CL_INVALID_ARG_VALUE);
|
||||
EXPECT_EQ(crossThreadDataUnchanged(), true);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelArgDevQueueTest, GivenInvalidSizeWhenSettingArgDevQueueThenInvalidArgSizeErrorIsReturned, DeviceEnqueueSupport) {
|
||||
auto clDeviceQueue = static_cast<cl_command_queue>(pDeviceQueue);
|
||||
|
||||
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue) - 1, &clDeviceQueue);
|
||||
EXPECT_EQ(ret, CL_INVALID_ARG_SIZE);
|
||||
EXPECT_EQ(crossThreadDataUnchanged(), true);
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -19,7 +19,6 @@
|
||||
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/gtest_helpers.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/gtest_helpers.h"
|
||||
@ -1013,264 +1012,6 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
// setup surface state heap
|
||||
char surfaceStateHeap[0x80];
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(0u, surfaceAddress);
|
||||
auto surfaceType = surfaceState->getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
// setup surface state heap
|
||||
char surfaceStateHeap[0x80];
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
pKernel->patchEventPool(pDevQueue);
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), surfaceAddress);
|
||||
auto surfaceType = surfaceState->getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) {
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
uint64_t crossThreadData = 123;
|
||||
|
||||
pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t));
|
||||
|
||||
pKernel->patchEventPool(pDevQueue);
|
||||
|
||||
EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData());
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched, DeviceEnqueueSupport) {
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
if (pClDevice->areOcl21FeaturesSupported() == false) {
|
||||
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
|
||||
} else {
|
||||
}
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) {
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
uint64_t crossThreadData = 0;
|
||||
|
||||
pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t));
|
||||
|
||||
pKernel->patchEventPool(pDevQueue);
|
||||
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData());
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
// setup surface state heap
|
||||
char surfaceStateHeap[0x80];
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(0u, surfaceAddress);
|
||||
auto surfaceType = surfaceState->getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
// setup surface state heap
|
||||
char surfaceStateHeap[0x80];
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
pKernel->patchDefaultDeviceQueue(pDevQueue);
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), surfaceAddress);
|
||||
auto surfaceType = surfaceState->getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
uint64_t crossThreadData = 123;
|
||||
|
||||
pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t));
|
||||
|
||||
pKernel->patchDefaultDeviceQueue(pDevQueue);
|
||||
|
||||
EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData());
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 0);
|
||||
|
||||
// create kernel
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice);
|
||||
|
||||
uint64_t crossThreadData = 0;
|
||||
|
||||
pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t));
|
||||
|
||||
pKernel->patchDefaultDeviceQueue(pDevQueue);
|
||||
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData());
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
typedef Test<ClDeviceFixture> KernelResidencyTest;
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIsMadeResident) {
|
||||
@ -3052,64 +2793,6 @@ TEST(KernelTest, givenKernelWhenSettingAdditinalKernelExecInfoThenCorrectValueIs
|
||||
EXPECT_EQ(AdditionalKernelExecInfo::NotApplicable, mockKernel.getAdditionalKernelExecInfo());
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class DeviceQueueHwMock : public DeviceQueueHw<GfxFamily> {
|
||||
using BaseClass = DeviceQueueHw<GfxFamily>;
|
||||
|
||||
public:
|
||||
using BaseClass::buildSlbDummyCommands;
|
||||
using BaseClass::getCSPrefetchSize;
|
||||
using BaseClass::getExecutionModelCleanupSectionSize;
|
||||
using BaseClass::getMediaStateClearCmdsSize;
|
||||
using BaseClass::getMinimumSlbSize;
|
||||
using BaseClass::getProfilingEndCmdsSize;
|
||||
using BaseClass::getSlbCS;
|
||||
using BaseClass::getWaCommandsSize;
|
||||
using BaseClass::offsetDsh;
|
||||
|
||||
DeviceQueueHwMock(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) {
|
||||
auto slb = this->getSlbBuffer();
|
||||
LinearStream *slbCS = getSlbCS();
|
||||
slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset
|
||||
};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
HWTEST2_F(DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue, DeviceEnqueueSupport) {
|
||||
std::unique_ptr<DeviceQueueHwMock<FamilyType>> mockDeviceQueueHw(new DeviceQueueHwMock<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
auto slb = mockDeviceQueueHw->getSlbBuffer();
|
||||
auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize();
|
||||
auto slbCopy = malloc(slb->getUnderlyingBufferSize());
|
||||
memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize());
|
||||
memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize());
|
||||
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
// slbEndOffset < commandsSize * 128
|
||||
// always fill only 1 enqueue (after offset)
|
||||
auto offset = static_cast<int>(commandsSize) * 50;
|
||||
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset;
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset
|
||||
EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset),
|
||||
slbCopy, commandsSize)); // change 1 enqueue
|
||||
EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize),
|
||||
slbCopy, offset)); // dont touch memory after (offset + 1 enqueue)
|
||||
|
||||
// slbEndOffset == commandsSize * 128
|
||||
// dont fill commands
|
||||
memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize());
|
||||
offset = static_cast<int>(commandsSize) * 128;
|
||||
igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast<int>(commandsSize);
|
||||
mockDeviceQueueHw->resetDeviceQueue();
|
||||
EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues
|
||||
|
||||
free(slbCopy);
|
||||
}
|
||||
|
||||
using KernelMultiRootDeviceTest = MultiRootDeviceFixture;
|
||||
|
||||
TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThenPrivateSurfacesHaveCorrectRootDeviceIndex) {
|
||||
|
@ -16,7 +16,6 @@ set(IGDRCL_SRCS_tests_mocks
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_command_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_context.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_device_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info_ocl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_image.h
|
||||
|
@ -1,179 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
class MockDeviceQueueHw : public DeviceQueueHw<GfxFamily> {
|
||||
using BaseClass = DeviceQueueHw<GfxFamily>;
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
|
||||
using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
public:
|
||||
using BaseClass::addArbCheckCmdWa;
|
||||
using BaseClass::addLriCmd;
|
||||
using BaseClass::addLriCmdWa;
|
||||
using BaseClass::addMediaStateClearCmds;
|
||||
using BaseClass::addMiAtomicCmdWa;
|
||||
using BaseClass::addPipeControlCmdWa;
|
||||
using BaseClass::addProfilingEndCmds;
|
||||
using BaseClass::buildSlbDummyCommands;
|
||||
using BaseClass::getBlockKernelStartPointer;
|
||||
using BaseClass::getCSPrefetchSize;
|
||||
using BaseClass::getExecutionModelCleanupSectionSize;
|
||||
using BaseClass::getMediaStateClearCmdsSize;
|
||||
using BaseClass::getMinimumSlbSize;
|
||||
using BaseClass::getProfilingEndCmdsSize;
|
||||
using BaseClass::getSlbCS;
|
||||
using BaseClass::getWaCommandsSize;
|
||||
using BaseClass::offsetDsh;
|
||||
|
||||
bool arbCheckWa;
|
||||
bool miAtomicWa;
|
||||
bool lriWa;
|
||||
bool pipeControlWa;
|
||||
|
||||
struct ExpectedCmds {
|
||||
MEDIA_STATE_FLUSH mediaStateFlush;
|
||||
MI_ARB_CHECK arbCheck;
|
||||
MI_ATOMIC miAtomic;
|
||||
MEDIA_INTERFACE_DESCRIPTOR_LOAD mediaIdLoad;
|
||||
MI_LOAD_REGISTER_IMM lriTrue;
|
||||
MI_LOAD_REGISTER_IMM lriFalse;
|
||||
PIPE_CONTROL pipeControl;
|
||||
PIPE_CONTROL noopedPipeControl;
|
||||
GPGPU_WALKER gpgpuWalker;
|
||||
uint8_t *prefetch;
|
||||
MI_BATCH_BUFFER_START bbStart;
|
||||
} expectedCmds;
|
||||
|
||||
MockDeviceQueueHw(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) : BaseClass(context, device, properties) {
|
||||
auto slb = this->getSlbBuffer();
|
||||
LinearStream *slbCS = getSlbCS();
|
||||
slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize());
|
||||
size_t size = slbCS->getUsed();
|
||||
|
||||
lriWa = false;
|
||||
addLriCmdWa(true);
|
||||
if (slbCS->getUsed() > size) {
|
||||
size = slbCS->getUsed();
|
||||
lriWa = true;
|
||||
}
|
||||
pipeControlWa = false;
|
||||
addPipeControlCmdWa();
|
||||
if (slbCS->getUsed() > size) {
|
||||
size = slbCS->getUsed();
|
||||
pipeControlWa = true;
|
||||
}
|
||||
arbCheckWa = false;
|
||||
addArbCheckCmdWa();
|
||||
if (slbCS->getUsed() > size) {
|
||||
size = slbCS->getUsed();
|
||||
arbCheckWa = true;
|
||||
}
|
||||
miAtomicWa = false;
|
||||
addMiAtomicCmdWa(0);
|
||||
if (slbCS->getUsed() > size) {
|
||||
size = slbCS->getUsed();
|
||||
miAtomicWa = true;
|
||||
}
|
||||
slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset
|
||||
|
||||
setupExpectedCmds();
|
||||
};
|
||||
|
||||
~MockDeviceQueueHw() override {
|
||||
if (expectedCmds.prefetch)
|
||||
delete expectedCmds.prefetch;
|
||||
}
|
||||
|
||||
MI_ATOMIC getExpectedMiAtomicCmd() {
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(this->queueBuffer->getUnderlyingBuffer());
|
||||
auto placeholder = (uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder;
|
||||
|
||||
MI_ATOMIC miAtomic = GfxFamily::cmdInitAtomic;
|
||||
EncodeAtomic<GfxFamily>::programMiAtomic(&miAtomic,
|
||||
placeholder,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD,
|
||||
0x1u, 0x1u, 0x0u, 0x0u);
|
||||
return miAtomic;
|
||||
}
|
||||
|
||||
MI_LOAD_REGISTER_IMM getExpectedLriCmd(bool arbCheck) {
|
||||
MI_LOAD_REGISTER_IMM lri = GfxFamily::cmdInitLoadRegisterImm;
|
||||
lri.setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset
|
||||
if (arbCheck)
|
||||
lri.setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only)
|
||||
else
|
||||
lri.setDataDword(0x0); // default value
|
||||
|
||||
return lri;
|
||||
}
|
||||
|
||||
PIPE_CONTROL getExpectedPipeControlCmd() {
|
||||
PIPE_CONTROL pc;
|
||||
this->initPipeControl(&pc);
|
||||
return pc;
|
||||
}
|
||||
|
||||
MI_ARB_CHECK getExpectedArbCheckCmd() {
|
||||
return GfxFamily::cmdInitArbCheck;
|
||||
}
|
||||
|
||||
void setupExpectedCmds() {
|
||||
expectedCmds.mediaStateFlush = GfxFamily::cmdInitMediaStateFlush;
|
||||
expectedCmds.arbCheck = getExpectedArbCheckCmd();
|
||||
expectedCmds.miAtomic = getExpectedMiAtomicCmd();
|
||||
expectedCmds.mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad;
|
||||
expectedCmds.mediaIdLoad.setInterfaceDescriptorTotalLength(2048);
|
||||
|
||||
auto dataStartAddress = DeviceQueue::colorCalcStateSize;
|
||||
|
||||
// add shift to second table ( 62 index of first ID table with scheduler )
|
||||
dataStartAddress += sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::schedulerIDIndex;
|
||||
|
||||
expectedCmds.mediaIdLoad.setInterfaceDescriptorDataStartAddress(dataStartAddress);
|
||||
expectedCmds.lriTrue = getExpectedLriCmd(true);
|
||||
expectedCmds.lriFalse = getExpectedLriCmd(false);
|
||||
expectedCmds.pipeControl = getExpectedPipeControlCmd();
|
||||
memset(&expectedCmds.noopedPipeControl, 0x0, sizeof(PIPE_CONTROL));
|
||||
expectedCmds.gpgpuWalker = GfxFamily::cmdInitGpgpuWalker;
|
||||
expectedCmds.gpgpuWalker.setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16);
|
||||
expectedCmds.gpgpuWalker.setThreadGroupIdXDimension(1);
|
||||
expectedCmds.gpgpuWalker.setThreadGroupIdYDimension(1);
|
||||
expectedCmds.gpgpuWalker.setThreadGroupIdZDimension(1);
|
||||
expectedCmds.gpgpuWalker.setRightExecutionMask(0xFFFFFFFF);
|
||||
expectedCmds.gpgpuWalker.setBottomExecutionMask(0xFFFFFFFF);
|
||||
expectedCmds.prefetch = new uint8_t[DeviceQueueHw<GfxFamily>::getCSPrefetchSize()];
|
||||
memset(expectedCmds.prefetch, 0x0, DeviceQueueHw<GfxFamily>::getCSPrefetchSize());
|
||||
expectedCmds.bbStart = GfxFamily::cmdInitBatchBufferStart;
|
||||
auto slbPtr = reinterpret_cast<uintptr_t>(this->getSlbBuffer()->getUnderlyingBuffer());
|
||||
expectedCmds.bbStart.setBatchBufferStartAddress(slbPtr);
|
||||
}
|
||||
|
||||
IGIL_CommandQueue *getIgilQueue() {
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(DeviceQueue::queueBuffer->getUnderlyingBuffer());
|
||||
return igilCmdQueue;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
@ -1,12 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(IGDRCL_SRCS_mt_tests_device_queue
|
||||
# local files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_mt_tests.cpp
|
||||
)
|
||||
target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_device_queue})
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
typedef ::testing::Test DeviceQueueHwMtTest;
|
||||
|
||||
HWTEST2_F(DeviceQueueHwMtTest, givenTakenIgilCriticalSectionWhenSecondThreadIsWaitingThenDontHang, DeviceEnqueueSupport) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = std::unique_ptr<MockContext>(new MockContext());
|
||||
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context.get(), device.get(), properties[0]);
|
||||
|
||||
auto igilCmdQueue = mockDevQueue.getIgilQueue();
|
||||
auto igilCriticalSection = const_cast<volatile uint *>(&igilCmdQueue->m_controls.m_CriticalSection);
|
||||
*igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Taken;
|
||||
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
|
||||
|
||||
std::mutex mtx;
|
||||
|
||||
auto thread = std::thread([&] {
|
||||
std::unique_lock<std::mutex> inThreadLock(mtx);
|
||||
while (!mockDevQueue.isEMCriticalSectionFree()) {
|
||||
inThreadLock.unlock();
|
||||
inThreadLock.lock();
|
||||
}
|
||||
});
|
||||
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
*igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Free;
|
||||
lock.unlock();
|
||||
|
||||
thread.join();
|
||||
EXPECT_TRUE(mockDevQueue.isEMCriticalSectionFree());
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -7,8 +7,5 @@
|
||||
set(IGDRCL_SRCS_tests_scheduler
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.inl
|
||||
)
|
||||
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scheduler})
|
||||
|
@ -1,244 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/test/unit_test/scheduler/scheduler_source_tests.h"
|
||||
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
|
||||
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "hw_cmds.h"
|
||||
// Keep this include after execution_model_fixture.h otherwise there is high chance of conflict with macros
|
||||
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
using namespace NEO;
|
||||
using namespace BuiltinKernelsSimulation;
|
||||
|
||||
HWTEST2_F(SchedulerSourceTest, WhenEnqueingThenGpgpuWalkerIsPatchedCorrectly, DeviceEnqueueSupport) {
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
size_t msfOffset = 0;
|
||||
size_t miArbCheckOffset = 0;
|
||||
size_t miAtomicOffset = 0;
|
||||
size_t mediaIDLoadOffset = 0;
|
||||
size_t miLoadRegOffset = 0;
|
||||
size_t pipeControlOffset = 0;
|
||||
size_t gpgpuOffset = 0;
|
||||
size_t msfOffset2 = 0;
|
||||
size_t miArbCheckOffset2 = 0;
|
||||
|
||||
size_t msfOffsetAfter = 0;
|
||||
size_t miArbCheckOffsetAfter = 0;
|
||||
size_t miAtomicOffsetAfter = 0;
|
||||
size_t mediaIDLoadOffsetAfter = 0;
|
||||
size_t miLoadRegOffsetAfter = 0;
|
||||
size_t pipeControlOffsetAfter = 0;
|
||||
size_t gpgpuOffsetAfter = 0;
|
||||
size_t msfOffsetAfter2 = 0;
|
||||
size_t miArbCheckOffsetAfter2 = 0;
|
||||
|
||||
auto pDevQueueHw = new MockDeviceQueueHw<FamilyType>(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);
|
||||
|
||||
// Prepopulate SLB with commands
|
||||
pDevQueueHw->buildSlbDummyCommands();
|
||||
LinearStream *slb = pDevQueueHw->getSlbCS();
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*slb, 0);
|
||||
|
||||
// Parse commands and save offsets of first enqueue space
|
||||
auto itorMediaStateFlush = find<MEDIA_STATE_FLUSH *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush;
|
||||
|
||||
EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf);
|
||||
|
||||
auto itorArbCheck = find<MI_ARB_CHECK *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr;
|
||||
|
||||
auto itorMiAtomic = find<MI_ATOMIC *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr;
|
||||
|
||||
auto itorIDLoad = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr;
|
||||
|
||||
auto itorMiLoadReg = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr;
|
||||
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr;
|
||||
|
||||
auto itorWalker = find<GPGPU_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
auto *walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr;
|
||||
|
||||
auto itorMediaStateFlush2 = find<MEDIA_STATE_FLUSH *>(itorWalker, hwParser.cmdList.end());
|
||||
auto *msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr;
|
||||
|
||||
auto itorArbCheck2 = find<MI_ARB_CHECK *>(itorWalker, hwParser.cmdList.end());
|
||||
auto *arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr;
|
||||
|
||||
if (msf)
|
||||
msfOffset = ptrDiff(msf, slb->getCpuBase());
|
||||
|
||||
if (arbCheck)
|
||||
miArbCheckOffset = ptrDiff(arbCheck, slb->getCpuBase());
|
||||
|
||||
if (miAtomic)
|
||||
miAtomicOffset = ptrDiff(miAtomic, slb->getCpuBase());
|
||||
|
||||
if (idLoad)
|
||||
mediaIDLoadOffset = ptrDiff(idLoad, slb->getCpuBase());
|
||||
|
||||
if (miLoadReg)
|
||||
miLoadRegOffset = ptrDiff(miLoadReg, slb->getCpuBase());
|
||||
|
||||
if (pipeControl)
|
||||
pipeControlOffset = ptrDiff(pipeControl, slb->getCpuBase());
|
||||
|
||||
if (walker)
|
||||
gpgpuOffset = ptrDiff(walker, slb->getCpuBase());
|
||||
|
||||
if (msf2)
|
||||
msfOffset2 = ptrDiff(msf2, slb->getCpuBase());
|
||||
|
||||
if (arbCheck2)
|
||||
miArbCheckOffset2 = ptrDiff(arbCheck2, slb->getCpuBase());
|
||||
|
||||
uint32_t *slbBuffer = (uint32_t *)slb->getCpuBase();
|
||||
uint32_t secondLevelBatchOffset = 0;
|
||||
uint32_t InterfaceDescriptorOffset = 3;
|
||||
uint32_t SIMDSize = 16;
|
||||
uint32_t TotalLocalWorkSize = 24;
|
||||
uint3 DimSize = {6, 4, 1};
|
||||
uint3 StartPoint = {4, 4, 0};
|
||||
uint32_t NumberOfHWThreadsPerWG = 3;
|
||||
uint32_t IndirectPayloadSize = 10;
|
||||
uint32_t IOHoffset = 256;
|
||||
|
||||
SchedulerSimulation<FamilyType>::patchGpGpuWalker(secondLevelBatchOffset, slbBuffer, InterfaceDescriptorOffset, SIMDSize, TotalLocalWorkSize, DimSize, StartPoint, NumberOfHWThreadsPerWG, IndirectPayloadSize, IOHoffset);
|
||||
|
||||
size_t commandsSize = pDevQueueHw->getMinimumSlbSize() + pDevQueueHw->getWaCommandsSize();
|
||||
|
||||
// Parse again
|
||||
LinearStream slbTested(slbBuffer, commandsSize);
|
||||
hwParser.cmdList.clear();
|
||||
slbTested.getSpace(commandsSize);
|
||||
hwParser.parseCommands<FamilyType>(slbTested, 0);
|
||||
|
||||
itorMediaStateFlush = find<MEDIA_STATE_FLUSH *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush;
|
||||
|
||||
EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf);
|
||||
|
||||
itorArbCheck = find<MI_ARB_CHECK *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr;
|
||||
|
||||
itorMiAtomic = find<MI_ATOMIC *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr;
|
||||
|
||||
itorIDLoad = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr;
|
||||
|
||||
itorMiLoadReg = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr;
|
||||
|
||||
itorPipeControl = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr;
|
||||
|
||||
itorWalker = find<GPGPU_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr;
|
||||
|
||||
itorMediaStateFlush2 = find<MEDIA_STATE_FLUSH *>(itorWalker, hwParser.cmdList.end());
|
||||
msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr;
|
||||
|
||||
itorArbCheck2 = find<MI_ARB_CHECK *>(itorWalker, hwParser.cmdList.end());
|
||||
arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr;
|
||||
|
||||
if (msf)
|
||||
msfOffsetAfter = ptrDiff(msf, slbTested.getCpuBase());
|
||||
|
||||
if (arbCheck)
|
||||
miArbCheckOffsetAfter = ptrDiff(arbCheck, slbTested.getCpuBase());
|
||||
|
||||
if (miAtomic)
|
||||
miAtomicOffsetAfter = ptrDiff(miAtomic, slbTested.getCpuBase());
|
||||
|
||||
if (idLoad)
|
||||
mediaIDLoadOffsetAfter = ptrDiff(idLoad, slbTested.getCpuBase());
|
||||
|
||||
if (miLoadReg)
|
||||
miLoadRegOffsetAfter = ptrDiff(miLoadReg, slbTested.getCpuBase());
|
||||
|
||||
if (pipeControl)
|
||||
pipeControlOffsetAfter = ptrDiff(pipeControl, slbTested.getCpuBase());
|
||||
|
||||
if (walker)
|
||||
gpgpuOffsetAfter = ptrDiff(walker, slbTested.getCpuBase());
|
||||
|
||||
if (msf2)
|
||||
msfOffsetAfter2 = ptrDiff(msf2, slbTested.getCpuBase());
|
||||
|
||||
if (arbCheck2)
|
||||
miArbCheckOffsetAfter2 = ptrDiff(arbCheck2, slbTested.getCpuBase());
|
||||
|
||||
EXPECT_EQ(msfOffset, msfOffsetAfter);
|
||||
EXPECT_EQ(miArbCheckOffset, miArbCheckOffsetAfter);
|
||||
EXPECT_EQ(miAtomicOffset, miAtomicOffsetAfter);
|
||||
EXPECT_EQ(mediaIDLoadOffset, mediaIDLoadOffsetAfter);
|
||||
EXPECT_EQ(miLoadRegOffset, miLoadRegOffsetAfter);
|
||||
EXPECT_EQ(pipeControlOffset, pipeControlOffsetAfter);
|
||||
EXPECT_EQ(gpgpuOffset, gpgpuOffsetAfter);
|
||||
EXPECT_EQ(msfOffset2, msfOffsetAfter2);
|
||||
EXPECT_EQ(miArbCheckOffset2, miArbCheckOffsetAfter2);
|
||||
|
||||
if (walker) {
|
||||
EXPECT_EQ(InterfaceDescriptorOffset, walker->getInterfaceDescriptorOffset());
|
||||
EXPECT_EQ(NumberOfHWThreadsPerWG, walker->getThreadWidthCounterMaximum());
|
||||
|
||||
EXPECT_EQ(16u, SIMDSize);
|
||||
typename GPGPU_WALKER::SIMD_SIZE simd = GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16;
|
||||
EXPECT_EQ(simd, walker->getSimdSize());
|
||||
|
||||
EXPECT_EQ(StartPoint.x, walker->getThreadGroupIdStartingX());
|
||||
EXPECT_EQ(StartPoint.y, walker->getThreadGroupIdStartingY());
|
||||
//EXPECT_EQ(StartPoint.z, walker->GetThreadGroupIdStartingZ());
|
||||
|
||||
EXPECT_EQ(DimSize.x, walker->getThreadGroupIdXDimension());
|
||||
EXPECT_EQ(DimSize.y, walker->getThreadGroupIdYDimension());
|
||||
//EXPECT_EQ(DimSize.z, walker->getThreadGroupIdZDimension());
|
||||
|
||||
uint32_t mask = static_cast<uint32_t>(maxNBitValue(TotalLocalWorkSize % SIMDSize));
|
||||
if (mask == 0)
|
||||
mask = ~0;
|
||||
uint32_t yMask = 0xffffffff;
|
||||
|
||||
EXPECT_EQ(mask, walker->getRightExecutionMask());
|
||||
EXPECT_EQ(yMask, walker->getBottomExecutionMask());
|
||||
|
||||
EXPECT_EQ(IndirectPayloadSize, walker->getIndirectDataLength());
|
||||
|
||||
EXPECT_EQ(IOHoffset, walker->getIndirectDataStartAddress());
|
||||
} else {
|
||||
EXPECT_TRUE(false) << "GPGPU_WALKER commandnot found, patchGpGpuWalker could have corrupted prepopulated commands\n";
|
||||
}
|
||||
|
||||
delete pDevQueueHw;
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class SchedulerSourceTest : public testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
pDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)};
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice);
|
||||
}
|
||||
void TearDown() override {
|
||||
delete pDevice;
|
||||
}
|
||||
|
||||
MockClDevice *pDevice;
|
||||
MockContext context;
|
||||
|
||||
template <typename GfxFamily>
|
||||
void givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest();
|
||||
template <typename GfxFamily>
|
||||
void givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest();
|
||||
template <typename GfxFamily>
|
||||
void givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest();
|
||||
};
|
||||
|
||||
} // namespace NEO
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSourceTest::givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest() {
|
||||
auto devQueueHw = std::unique_ptr<MockDeviceQueueHw<GfxFamily>>(new MockDeviceQueueHw<GfxFamily>(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]));
|
||||
|
||||
auto singleEnqueueSpace = devQueueHw->getMinimumSlbSize() + devQueueHw->getWaCommandsSize();
|
||||
EXPECT_EQ(singleEnqueueSpace, SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSourceTest::givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest() {
|
||||
auto devQueueHw = std::unique_ptr<MockDeviceQueueHw<GfxFamily>>(new MockDeviceQueueHw<GfxFamily>(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]));
|
||||
devQueueHw->buildSlbDummyCommands();
|
||||
|
||||
auto slbCS = devQueueHw->getSlbCS();
|
||||
auto usedSpace = slbCS->getUsed();
|
||||
|
||||
auto spaceRequiredForEnqueuesAndBBStart = SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
|
||||
EXPECT_EQ(usedSpace, spaceRequiredForEnqueuesAndBBStart);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSourceTest::givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest() {
|
||||
EXPECT_EQ(DeviceQueue::numberOfDeviceEnqueues, static_cast<uint32_t>(SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES));
|
||||
}
|
Reference in New Issue
Block a user