Disable deviceEnqueue for BDW

Related-To: NEO-6378

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2021-10-26 16:15:57 +00:00 committed by Compute-Runtime-Automation
parent 4bd31de0f4
commit bdf5a1c39c
72 changed files with 351 additions and 1678 deletions

View File

@ -10,6 +10,8 @@ SET_FLAGS_FOR("GEN11" "ICLLP" "LKF" "EHL")
SET_FLAGS_FOR("GEN12LP" "TGLLP" "RKL" "ADLS" "ADLP" "DG1")
SET_FLAGS_FOR("XE_HP_CORE" "XE_HP_SDV")
set(DEVICE_ENQUEUE_DISABLED_GENS "GEN8" "GEN12LP" ${XEHP_AND_LATER_GENS})
foreach(GEN_TYPE ${XEHP_AND_LATER_GENS})
if(TESTS_${GEN_TYPE})
set(TESTS_XEHP_AND_LATER 1)
@ -17,6 +19,9 @@ foreach(GEN_TYPE ${XEHP_AND_LATER_GENS})
if(SUPPORT_${GEN_TYPE})
set(SUPPORT_XEHP_AND_LATER 1)
endif()
endforeach()
foreach(GEN_TYPE ${DEVICE_ENQUEUE_DISABLED_GENS})
set(SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE} FALSE CACHE BOOL "Disabled support ${GEN_TYPE} for device side enqueue" FORCE)
endforeach()

View File

@ -1,93 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl"
#include "opencl/source/execution_model/device_enqueue.h"
#include "CL/cl.h"
#include "hw_cmds.h"
#include <type_traits>
using namespace NEO;
using namespace BuiltinKernelsSimulation;
namespace Gen12LPSchedulerSimulation {
#define SCHEDULER_EMULATION
uint GetNextPowerof2(uint number);
float __intel__getProfilingTimerResolution() {
return static_cast<float>(DEFAULT_GEN12LP_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
}
#include "opencl/source/gen12lp/device_enqueue.h"
#include "opencl/source/gen12lp/scheduler_builtin_kernel.inl"
#include "opencl/source/scheduler/scheduler.cl"
} // namespace Gen12LPSchedulerSimulation
namespace BuiltinKernelsSimulation {
template <>
void SchedulerSimulation<TGLLPFamily>::startScheduler(uint32_t index,
GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue) {
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
while (!conditionReady) {
}
Gen12LPSchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
(uint *)commandsStack->getUnderlyingBuffer(),
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
(char *)dsh->getUnderlyingBuffer(),
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
(char *)ssh->getUnderlyingBuffer(),
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
}
template <>
void SchedulerSimulation<TGLLPFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
__global uint *secondaryBatchBuffer,
uint interfaceDescriptorOffset,
uint simdSize,
uint totalLocalWorkSize,
uint3 dimSize,
uint3 startPoint,
uint numberOfHwThreadsPerWg,
uint indirectPayloadSize,
uint ioHoffset) {
Gen12LPSchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
secondaryBatchBuffer,
interfaceDescriptorOffset,
simdSize,
totalLocalWorkSize,
dimSize,
startPoint,
numberOfHwThreadsPerWg,
indirectPayloadSize,
ioHoffset);
}
template class SchedulerSimulation<TGLLPFamily>;
} // namespace BuiltinKernelsSimulation

View File

@ -1,91 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen8/hw_cmds.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl"
#include "opencl/source/execution_model/device_enqueue.h"
#include "CL/cl.h"
using namespace NEO;
using namespace BuiltinKernelsSimulation;
namespace Gen8SchedulerSimulation {
#define SCHEDULER_EMULATION
uint GetNextPowerof2(uint number);
float __intel__getProfilingTimerResolution() {
return static_cast<float>(DEFAULT_GEN8_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
}
#include "opencl/source/gen8/device_enqueue.h"
#include "opencl/source/gen8/scheduler_builtin_kernel.inl"
#include "opencl/source/scheduler/scheduler.cl"
} // namespace Gen8SchedulerSimulation
namespace BuiltinKernelsSimulation {
template <>
void SchedulerSimulation<BDWFamily>::startScheduler(uint32_t index,
GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue) {
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
while (!conditionReady) {
}
Gen8SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
(uint *)commandsStack->getUnderlyingBuffer(),
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
(char *)dsh->getUnderlyingBuffer(),
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
(char *)ssh->getUnderlyingBuffer(),
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
}
template <>
void SchedulerSimulation<BDWFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
__global uint *secondaryBatchBuffer,
uint interfaceDescriptorOffset,
uint simdSize,
uint totalLocalWorkSize,
uint3 dimSize,
uint3 startPoint,
uint numberOfHwThreadsPerWg,
uint indirectPayloadSize,
uint ioHoffset) {
Gen8SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
secondaryBatchBuffer,
interfaceDescriptorOffset,
simdSize,
totalLocalWorkSize,
dimSize,
startPoint,
numberOfHwThreadsPerWg,
indirectPayloadSize,
ioHoffset);
}
template class SchedulerSimulation<BDWFamily>;
} // namespace BuiltinKernelsSimulation

View File

@ -13,6 +13,8 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
@ -41,6 +43,8 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl

View File

@ -9,20 +9,6 @@
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
devQueueHw.getStackBuffer(),
devQueueHw.getEventPoolBuffer(),
devQueueHw.getSlbBuffer(),
devQueueHw.getDshBuffer(),
parentKernel.getKernelReflectionSurface(),
devQueueHw.getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw.getDebugQueue());
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::submitCacheFlush(Surface **surfaces,
size_t numSurfaces,

View File

@ -0,0 +1,15 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/command_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
}
} // namespace NEO

View File

@ -0,0 +1,25 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/command_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
devQueueHw.getStackBuffer(),
devQueueHw.getEventPoolBuffer(),
devQueueHw.getSlbBuffer(),
devQueueHw.getDshBuffer(),
parentKernel.getKernelReflectionSurface(),
devQueueHw.getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw.getDebugQueue());
}
} // namespace NEO

View File

@ -9,14 +9,11 @@
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/command_queue/command_queue_hw_base.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/memory_manager/resource_surface.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
}
template <>
void CommandQueueHw<Family>::submitCacheFlush(Surface **surfaces,
size_t numSurfaces,

View File

@ -58,111 +58,6 @@ inline size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
return localWorkSize;
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
const auto &kernelInfo = scheduler.getKernelInfo();
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
NEO::PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable;
const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
// Program media interface descriptor load
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
commandStream,
offsetInterfaceDescriptor,
totalInterfaceDescriptorTableSize);
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
// Determine SIMD size
uint32_t simd = kernelInfo.getMaxSimdSize();
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
// Patch our kernel constants
scheduler.setGlobalWorkOffsetValues(0, 0, 0);
scheduler.setGlobalWorkSizeValues(static_cast<uint32_t>(scheduler.getGws()), 1, 1);
scheduler.setLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setLocalWorkSize2Values(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setNumWorkGroupsValues(static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0);
scheduler.setWorkDim(1);
// Send our indirect object data
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace());
indirectObjectHeap.getSpace(curbeOffset);
IndirectHeap *ioh = &indirectObjectHeap;
// Program the walker. Invokes execution so all state should already be programmed
auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>();
GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker;
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
*dsh,
*ioh,
*ssh,
scheduler,
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
interfaceDescriptorIndex,
preemptionMode,
&cmdWalker,
nullptr,
true,
devQueueHw.getDevice());
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
size_t globalOffsets[3] = {0, 0, 0};
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes,
simd, 1, true, inlineDataProgrammingRequired, 0u);
*pGpGpuWalkerCmd = cmdWalker;
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false);
// Do not put BB_START only when returning in first Scheduler run
if (devQueueHw.getSchedulerReturnInstance() != 1) {
args.dcFlushEnable = true;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
auto bbStart = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
MI_BATCH_BUFFER_START cmdBbStart = GfxFamily::cmdInitBatchBufferStart;
cmdBbStart.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH);
uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress();
cmdBbStart.setBatchBufferStartAddressGraphicsaddress472(slbAddress);
*bbStart = cmdBbStart;
}
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(
LinearStream *cmdStream,

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/gpgpu_walker.h"
namespace NEO {
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
UNRECOVERABLE_IF(true);
}
} // namespace NEO

View File

@ -0,0 +1,115 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/gpgpu_walker.h"
namespace NEO {
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
const auto &kernelInfo = scheduler.getKernelInfo();
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
NEO::PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable;
const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
// Program media interface descriptor load
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
commandStream,
offsetInterfaceDescriptor,
totalInterfaceDescriptorTableSize);
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
// Determine SIMD size
uint32_t simd = kernelInfo.getMaxSimdSize();
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
// Patch our kernel constants
scheduler.setGlobalWorkOffsetValues(0, 0, 0);
scheduler.setGlobalWorkSizeValues(static_cast<uint32_t>(scheduler.getGws()), 1, 1);
scheduler.setLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setLocalWorkSize2Values(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(scheduler.getLws()), 1, 1);
scheduler.setNumWorkGroupsValues(static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0);
scheduler.setWorkDim(1);
// Send our indirect object data
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace());
indirectObjectHeap.getSpace(curbeOffset);
IndirectHeap *ioh = &indirectObjectHeap;
// Program the walker. Invokes execution so all state should already be programmed
auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd<GPGPU_WALKER>();
GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker;
bool inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(scheduler);
auto kernelUsesLocalIds = HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(scheduler);
HardwareCommandsHelper<GfxFamily>::sendIndirectState(
commandStream,
*dsh,
*ioh,
*ssh,
scheduler,
scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
interfaceDescriptorIndex,
preemptionMode,
&cmdWalker,
nullptr,
true,
devQueueHw.getDevice());
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true);
size_t globalOffsets[3] = {0, 0, 0};
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes,
simd, 1, true, inlineDataProgrammingRequired, 0u);
*pGpGpuWalkerCmd = cmdWalker;
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false);
// Do not put BB_START only when returning in first Scheduler run
if (devQueueHw.getSchedulerReturnInstance() != 1) {
args.dcFlushEnable = true;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
auto bbStart = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
MI_BATCH_BUFFER_START cmdBbStart = GfxFamily::cmdInitBatchBufferStart;
cmdBbStart.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH);
uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress();
cmdBbStart.setBatchBufferStartAddressGraphicsaddress472(slbAddress);
*bbStart = cmdBbStart;
}
}
} // namespace NEO

View File

@ -16,6 +16,7 @@
#include "shared/source/helpers/simd_helper.h"
#include "opencl/source/command_queue/gpgpu_walker_base.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/platform/platform.h"
namespace NEO {
@ -89,18 +90,6 @@ size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
return localWorkSize;
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
UNRECOVERABLE_IF(true);
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(LinearStream *cmdStream,
WALKER_TYPE *walkerCmd,

View File

@ -10,7 +10,7 @@ set(RUNTIME_SRCS_DEVICE_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE})

View File

@ -10,6 +10,7 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
#include "enqueue_init_dispatch_globals.h"

View File

@ -9,8 +9,8 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw_bdw_and_later.inl"
#include "opencl/source/device_queue/device_queue_hw_profiling.inl"
#include "opencl/source/device_queue/device_queue_hw_skl_and_later.inl"
#include "opencl/source/gen11/device_enqueue.h"
namespace NEO {

View File

@ -8,6 +8,7 @@
#include "shared/source/gen11/hw_info.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@ -9,6 +9,7 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "command_queue_helpers_gen12lp.inl"
#include "enqueue_init_dispatch_globals.h"

View File

@ -1,22 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "../execution_model/device_enqueue.h"
// Uncomment this macro to build "empty" schedulers
//#define WA_DISABLE_SCHEDULERS 1
#define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP (15 * sizeof(uint))
#define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP (6 * sizeof(uint))
#define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET (6)
#define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET (15)
#define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN12LP (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP + CS_PREFETCH_SIZE)
#define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN12LP (128)

View File

@ -1,41 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen12lp/hw_cmds.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw_bdw_and_later.inl"
#include "opencl/source/device_queue/device_queue_hw_profiling.inl"
namespace NEO {
typedef TGLLPFamily Family;
static auto gfxCore = IGFX_GEN12LP_CORE;
template <>
void populateFactoryTable<DeviceQueueHw<Family>>() {
extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE];
deviceQueueFactory[gfxCore] = DeviceQueueHw<Family>::create;
}
template <>
size_t DeviceQueueHw<Family>::getWaCommandsSize() { return 0; }
template <>
void DeviceQueueHw<Family>::addArbCheckCmdWa() {}
template <>
void DeviceQueueHw<Family>::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) {}
template <>
void DeviceQueueHw<Family>::addLriCmdWa(bool setArbCheck) {}
template <>
void DeviceQueueHw<Family>::addPipeControlCmdWa(bool isNoopCmd) {}
template class DeviceQueueHw<Family>;
} // namespace NEO

View File

@ -28,7 +28,6 @@ struct EnableOCLGen12LP {
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();

View File

@ -11,6 +11,7 @@
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@ -1,93 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "scheduler_definitions.h"
uint GetPatchValueForSLMSize(uint slMsize) {
//todo: veryfy this optimization :
//return ( SLMSize == 0 ) ? 0 : max( 33 - clz( ( SLMSize - 1 ) >> 10 ), 7 );
uint PatchValue = 0;
if (slMsize == 0) {
PatchValue = 0;
} else if (slMsize <= (1 * 1024)) {
PatchValue = 1;
} else if (slMsize <= (2 * 1024)) {
PatchValue = 2;
} else if (slMsize <= (4 * 1024)) {
PatchValue = 3;
} else if (slMsize <= (8 * 1024)) {
PatchValue = 4;
} else if (slMsize <= (16 * 1024)) {
PatchValue = 5;
} else if (slMsize <= (32 * 1024)) {
PatchValue = 6;
} else if (slMsize <= (64 * 1024)) {
PatchValue = 7;
}
return PatchValue;
}
//on SKL we have pipe control in pairs, therefore when we NOOP we need to do this for both pipe controls
void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) {
dwordOffset += pipeControlOffset;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
}
//on SKL+ with mid thread preemption we need to have 2 pipe controls instead of 1 any time we do post sync operation
void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) {
dwordOffset += pipeControlOffset;
//first pipe control doing CS stall
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
//second pipe control , doing actual timestamp write
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
}

View File

@ -1,150 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#ifdef _DUMMY_WA
#endif
//POPULATE DEFINES WITH CORRECT VALUES FOR GEN12LP
#define SCHEDULER_COMPILATION_SIZE 8
#define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP 32
#define NUMBER_OF_INERFACE_DESCRIPTORS 64
#define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2)
#define MAX_WKG_SIZE 448
#define INTERFACE_DESCRIPTOR_TABLE_SIZE_G12LP (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP)
#define SIZEOF_COLOR_CALCULATOR_STATE_G12LP 0xC0
#define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G12LP SIZEOF_COLOR_CALCULATOR_STATE_G12LP
#define OCLRT_SIZEOF_SAMPLER_STATE_G12LP (16)
#define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G12LP
#define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP
#define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G12LP
#define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G12LP
#define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G12LP
#define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN12LP)
#define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN12LP)
//#define OCLRT_MEDIA_VFE_STATE_OFFSET ( MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD )
#define OCLRT_MEDIA_VFE_STATE_OFFSET (0)
//address is QWORD in size and starts on DWORD 1
#define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1)
// DWORD OFFSET
#define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0
//bits 0-5 of 1st DWORD
#define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1)
#define MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_INITIAL_OFFSET 0
#define MI_ATOMIC_CMD_OFFSET 0
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD)
// DWORD OFFSET of InterfaceDescriptor Length
// bits 0 - 16
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORLENGTH_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 2)
// DWORD OFFSET of Interface Descriptor Start Address
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3)
#define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET)
#define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3
#define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4
#define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5
#define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6
#define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6
#define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9
#define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0)
#define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0)
#define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2
#define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET)
// DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER
// bits 0 - 5
#define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1)
// DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER
// bits 0 - 16
#define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2)
// DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER
#define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3)
// DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER
// bits 0 - 5
#define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER
// bits 8 - 13
#define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER
// bits 16 - 21
#define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the SIMD Size for GPGPU_WALKER
// bits 30 - 31
#define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Starting in X pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5)
// DWORD OFFSET of the X Dimension for GPGPU_WALKER
#define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7)
// DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8)
// DWORD OFFSET of the Y Dimension for GPGPU_WALKER
#define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10)
// DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11)
// DWORD OFFSET of the X Dimension for GPGPU_WALKER
#define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12)
// DWORD OFFSET of the Right or X Mask for GPGPU_WALKER
#define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13)
// DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER
#define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14)
#define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET)
//bits 0-5 of 1st DWORD of M_S_F command
#define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1)
#define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD)
#define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET)
#define PIPE_CONTROL_POST_SYNC_DWORD 1
#define PIPE_CONTROL_POST_SYNC_START_BIT 14
#define PIPE_CONTROL_POST_SYNC_END_BIT 15
#define PIPE_CONTROL_GENERATE_TIME_STAMP 3
#define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0
#define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2
#define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2
#define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2
#define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31
#define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0
#define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15
#define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004
#define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4
#define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004
#define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4
#define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004
#define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4
// the value of g_cInitMiBatchBufferStartCmdG12 DWORD0
#define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101)
#if defined WA_LRI_COMMANDS_EXIST
#define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET)
#endif
#define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001
#define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248
#define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100
#define CTXT_PREMP_DEFAULT_VALUE 0x0
#define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1
#define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2

View File

@ -10,6 +10,7 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
#include "enqueue_init_dispatch_globals.h"

View File

@ -1,37 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "../execution_model/device_enqueue.h"
#define WA_LRI_COMMANDS_EXIST_GEN8 1
#define WA_PROFILING_PREEMPTION 1
#define WA_SCHEDULER_PREEMPTION 1
#define WA_KERNEL_PREEMPTION 1
#if defined WA_LRI_COMMANDS_EXIST_GEN8
#define WA_LRI_COMMANDS_EXIST 1
#endif
#define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 (15 * sizeof(uint))
#define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 (6 * sizeof(uint))
#define OCLRT_LOAD_REGISTER_IMM_CMD_G8 (3 * sizeof(uint))
#define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8_DWORD_OFFSET (15)
#define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET (6)
#ifdef WA_LRI_COMMANDS_EXIST_GEN8
#define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8_AND_LATER (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ATOMIC_CMD + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_LOAD_REGISTER_IMM_CMD_G8 + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_LOAD_REGISTER_IMM_CMD_G8 + CS_PREFETCH_SIZE)
#else
#define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8_AND_LATER (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ATOMIC_CMD + OCLRT_SIZEOF_MEDIA_VFE_STATE_CMD + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + CS_PREFETCH_SIZE)
#endif //WA_LRI_COMMANDS_EXIST_GEN8
#define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN8_AND_LATER (128)

View File

@ -1,80 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/gen8/hw_cmds.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw_bdw_and_later.inl"
namespace NEO {
typedef BDWFamily Family;
static auto gfxCore = IGFX_GEN8_CORE;
template <>
void populateFactoryTable<DeviceQueueHw<Family>>() {
extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE];
deviceQueueFactory[gfxCore] = DeviceQueueHw<Family>::create;
}
template <>
size_t DeviceQueueHw<Family>::getWaCommandsSize() {
return sizeof(Family::MI_ATOMIC) +
sizeof(Family::MI_LOAD_REGISTER_IMM) +
sizeof(Family::MI_LOAD_REGISTER_IMM);
}
template <>
void DeviceQueueHw<Family>::addArbCheckCmdWa() {}
template <>
void DeviceQueueHw<Family>::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) {
EncodeAtomic<Family>::programMiAtomic(slbCS,
atomicOpPlaceholder,
Family::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT,
Family::MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD,
0x1u, 0x1u, 0x0u, 0x0u);
}
template <>
void DeviceQueueHw<Family>::addLriCmdWa(bool setArbCheck) {
// CTXT_PREMP_DBG offset
constexpr uint32_t registerAddress = 0x2248u;
uint32_t value = 0u;
if (setArbCheck) {
// set only bit 8 (Preempt On MI_ARB_CHK Only)
value = 0x00000100;
}
LriHelper<Family>::program(&slbCS,
registerAddress,
value,
false);
}
template <>
void DeviceQueueHw<Family>::addPipeControlCmdWa(bool isNoopCmd) {}
template <>
void DeviceQueueHw<Family>::addProfilingEndCmds(uint64_t timestampAddress) {
auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL));
auto pipeControlCmd = Family::cmdInitPipeControl;
pipeControlCmd.setCommandStreamerStallEnable(true);
pipeControlCmd.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP);
pipeControlCmd.setAddressHigh(timestampAddress >> 32);
pipeControlCmd.setAddress(timestampAddress & (0xffffffff));
*pipeControlSpace = pipeControlCmd;
}
template <>
void DeviceQueueHw<Family>::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {
pc->setDcFlushEnable(true);
}
template class DeviceQueueHw<Family>;
} // namespace NEO

View File

@ -12,7 +12,6 @@
#include "shared/source/utilities/perf_counter.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
@ -31,7 +30,6 @@ struct EnableOCLGen8 {
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();

View File

@ -8,6 +8,7 @@
#include "shared/source/gen8/hw_info.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@ -1,56 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "scheduler_definitions.h"
uint GetPatchValueForSLMSize(uint slMsize) {
uint PatchValue;
if (slMsize == 0) {
PatchValue = 0;
} else {
uint count4KB = slMsize / 4096;
if (slMsize % 4096 != 0) {
count4KB++;
}
PatchValue = GetNextPowerof2(count4KB);
}
return PatchValue;
}
//on BDW we have only 1 pipe control
void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) {
dwordOffset += pipeControlOffset;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
}
void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) {
dwordOffset += pipeControlOffset;
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
secondaryBatchBuffer[dwordOffset] = 0;
dwordOffset++;
}

View File

@ -1,143 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8 32
// Generation dependent number
// Number of Interface Descriptors is 64 for BDW
#define NUMBER_OF_INERFACE_DESCRIPTORS 64
#define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2)
#define INTERFACE_DESCRIPTOR_TABLE_SIZE_G8 (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8)
// Based on the alignment ( 64 vs 32 ) requirements this may be Gen dependent
#define SIZEOF_COLOR_CALCULATOR_STATE_G8 0xC0
#define OCLRT_SIZEOF_SAMPLER_STATE_G8 (16)
#define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G8
#define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8
#define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G8
#define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G8
#define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8_AND_LATER)
#define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN8_AND_LATER)
// DWORD OFFSET
#define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0
//bits 0-5 of 1st DWORD
#define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1)
#define MI_ATOMIC_CMD_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD)
//#define OCLRT_MEDIA_VFE_STATE_OFFSET (MI_ATOMIC_CMD_OFFSET + OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET)
//address is QWORD in size and starts on DWORD 1
//#define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1)
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MI_ATOMIC_CMD_OFFSET + OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET) //(OCLRT_MEDIA_VFE_STATE_OFFSET + OCLRT_SIZEOF_MEDIA_VFE_STATE_DWORD)
// DWORD OFFSET of Interface Descriptor Start Address
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3)
#define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3
#define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4
#define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5
#define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6
#define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6
#define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9
#define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0)
#define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0)
#define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2
//disable preemption is for Gen8
#if defined WA_LRI_COMMANDS_EXIST
#define IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET)
#define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET + OCLRT_IMM_LOAD_REGISTER_CMD_DEVICE_CMD_DWORD_OFFSET)
#else
#define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET)
#endif // WA_LRI_COMMANDS_EXIST
#define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET)
// DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER
// bits 0 - 5
#define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1)
// DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER
// bits 0 - 16
#define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2)
// DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER
#define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3)
// DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER
// bits 0 - 5
#define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER
// bits 8 - 13
#define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER
// bits 16 - 21
#define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the SIMD Size for GPGPU_WALKER
// bits 30 - 31
#define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4)
// DWORD OFFSET of the Starting in X pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5)
// DWORD OFFSET of the X Dimension for GPGPU_WALKER
#define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7)
// DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8)
// DWORD OFFSET of the Y Dimension for GPGPU_WALKER
#define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10)
// DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER
//bits 0 - 31
#define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11)
// DWORD OFFSET of the X Dimension for GPGPU_WALKER
#define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12)
// DWORD OFFSET of the Right or X Mask for GPGPU_WALKER
#define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13)
// DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER
#define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14)
#define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8_DWORD_OFFSET)
//bits 0-5 of 1st DWORD of M_S_F command
#define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1)
#define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD)
#define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET)
#define PIPE_CONTROL_POST_SYNC_DWORD 1
#define PIPE_CONTROL_POST_SYNC_START_BIT 14
#define PIPE_CONTROL_POST_SYNC_END_BIT 15
#define PIPE_CONTROL_GENERATE_TIME_STAMP 3
#define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0
#define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2
#define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2
#define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2
#define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31
#define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0
#define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15
#define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004
#define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4
#define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004
#define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4
#define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004
#define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4
// the value of InitMIBBStartCmd_G8 DWORD0
#define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101)
#if defined WA_LRI_COMMANDS_EXIST
#define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET)
#endif
#define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001
#define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248
#define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100
#define CTXT_PREMP_DEFAULT_VALUE 0x0
#define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1
#define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2

View File

@ -10,6 +10,7 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
#include "enqueue_init_dispatch_globals.h"

View File

@ -10,8 +10,8 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw_bdw_and_later.inl"
#include "opencl/source/device_queue/device_queue_hw_profiling.inl"
#include "opencl/source/device_queue/device_queue_hw_skl_and_later.inl"
namespace NEO {
typedef SKLFamily Family;

View File

@ -8,6 +8,7 @@
#include "shared/source/gen9/hw_cmds_base.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@ -9,6 +9,7 @@
#include "opencl/source/context/context.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "test.h"
#include "cl_api_tests.h"
@ -50,14 +51,14 @@ struct clSetDefaultDeviceCommandQueueApiTest : public api_tests {
cl_command_queue deviceQueue = nullptr;
};
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenSettingDefaultDeviceQueueThenSuccessIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenSettingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) {
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, deviceQueue);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(static_cast<_device_queue *>(deviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue()));
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenReplacingDefaultDeviceQueueThenSuccessIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenReplacingDefaultDeviceQueueThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES,
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
0,
@ -78,12 +79,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenValidPar
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullContextWhenSettingDefaultDeviceQueueThenClInvalidContextErrorIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullContextWhenSettingDefaultDeviceQueueThenClInvalidContextErrorIsReturned, DeviceEnqueueSupport) {
retVal = clSetDefaultDeviceCommandQueue(nullptr, testedClDevice, deviceQueue);
ASSERT_EQ(CL_INVALID_CONTEXT, retVal);
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceWhenSettingDefaultDeviceQueueThenClInvalidDeviceErrorIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceWhenSettingDefaultDeviceQueueThenClInvalidDeviceErrorIsReturned, DeviceEnqueueSupport) {
retVal = clSetDefaultDeviceCommandQueue(pContext, nullptr, deviceQueue);
ASSERT_EQ(CL_INVALID_DEVICE, retVal);
}
@ -96,12 +97,12 @@ TEST_F(clSetDefaultDeviceCommandQueueApiTest, GivenDeviceNotSupportingDeviceEnqu
ASSERT_EQ(CL_INVALID_OPERATION, retVal);
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
retVal = clSetDefaultDeviceCommandQueue(pContext, testedClDevice, nullptr);
ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenHostQueueAsDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenHostQueueAsDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, 0, 0, 0};
cl_command_queue hostQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal);
ASSERT_NE(nullptr, hostQueue);
@ -114,7 +115,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenHostQueu
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenIncorrectDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) {
HWTEST2_F(clSetDefaultDeviceCommandQueueApiTest, GivenIncorrectDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned, DeviceEnqueueSupport) {
auto context2 = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES,

View File

@ -1,10 +0,0 @@
#
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen12lp.cpp
)

View File

@ -1,117 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
using namespace NEO;
typedef AUBParentKernelFixture GEN12LPAUBParentKernelFixture;
GEN12LPTEST_F(GEN12LPAUBParentKernelFixture, WhenEnqueuingParentKernelThenExpectationsMet) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
ASSERT_NE(nullptr, pKernel);
ASSERT_TRUE(pKernel->isParentKernel);
const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE),
0, 0};
DeviceQueue *devQueue = DeviceQueue::create(
&pCmdQ->getContext(),
pClDevice,
properties[0],
retVal);
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
// Aub execution takes huge time for bigger GWS
scheduler.setGws(24);
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t lws[3] = {1, 1, 1};
// clang-format off
cl_image_format imageFormat;
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
imageFormat.image_channel_order = CL_R;
cl_image_desc desc = { 0 };
desc.image_array_size = 0;
desc.image_depth = 1;
desc.image_height = 4;
desc.image_width = 4;
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
// clang-format on
auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
Image *image = Image::create(
pContext,
{},
0,
0,
surfaceFormat,
&desc,
nullptr,
retVal);
Buffer *buffer = BufferHelper<BufferUseHostPtr<>>::create(pContext);
cl_mem bufferMem = buffer;
cl_mem imageMem = image;
auto sampler = Sampler::create(
pContext,
CL_TRUE,
CL_ADDRESS_NONE,
CL_FILTER_LINEAR,
retVal);
uint64_t argScalar = 2;
pKernel->setArg(
3,
sizeof(uint64_t),
&argScalar);
pKernel->setArg(
2,
sizeof(cl_mem),
&bufferMem);
pKernel->setArg(
1,
sizeof(cl_mem),
&imageMem);
pKernel->setArg(
0,
sizeof(cl_sampler),
&sampler);
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0);
pCmdQ->finish();
uint32_t expectedNumberOfEnqueues = 1;
uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues);
AUBCommandStreamFixture::expectMemory<FamilyType>(reinterpret_cast<void *>(gpuAddress), &expectedNumberOfEnqueues, sizeof(uint32_t));
AUBCommandStreamFixture::expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), &argScalar, sizeof(size_t));
delete devQueue;
delete image;
delete buffer;
delete sampler;
}

View File

@ -1,12 +0,0 @@
#
# Copyright (C) 2018-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(TESTS_GEN8)
target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
)
add_subdirectories()
endif()

View File

@ -1,10 +0,0 @@
#
# Copyright (C) 2018-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen8.cpp
)

View File

@ -1,113 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h"
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
#include <memory>
using namespace NEO;
typedef AUBParentKernelFixture GEN8AUBParentKernelFixture;
GEN8TEST_F(GEN8AUBParentKernelFixture, WhenEnqueuingParentKernelThenExpectationsMet) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
ASSERT_NE(nullptr, pKernel);
ASSERT_TRUE(pKernel->isParentKernel);
const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE),
0, 0};
std::unique_ptr<DeviceQueue> devQueue(DeviceQueue::create(
&pCmdQ->getContext(),
pClDevice,
properties[0],
retVal));
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
// Aub execution takes huge time for bigger GWS
scheduler.setGws(24);
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t lws[3] = {1, 1, 1};
cl_image_format imageFormat;
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
imageFormat.image_channel_order = CL_R;
cl_image_desc desc = {0};
desc.image_array_size = 0;
desc.image_depth = 1;
desc.image_height = 4;
desc.image_width = 4;
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &pClDevice->getDevice());
auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
std::unique_ptr<Image> image(Image::create(
pContext,
memoryProperties,
0,
0,
surfaceFormat,
&desc,
nullptr,
retVal));
std::unique_ptr<Buffer> buffer(BufferHelper<BufferUseHostPtr<>>::create(pContext));
cl_mem bufferMem = buffer.get();
cl_mem imageMem = image.get();
std::unique_ptr<Sampler> sampler(Sampler::create(
pContext,
CL_TRUE,
CL_ADDRESS_NONE,
CL_FILTER_LINEAR,
retVal));
uint64_t argScalar = 2;
pKernel->setArg(
3,
sizeof(uint64_t),
&argScalar);
pKernel->setArg(
2,
sizeof(cl_mem),
&bufferMem);
pKernel->setArg(
1,
sizeof(cl_mem),
&imageMem);
pKernel->setArg(
0,
sizeof(cl_sampler),
&sampler);
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0);
pCmdQ->finish();
uint32_t expectedNumberOfEnqueues = 1;
uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues);
AUBCommandStreamFixture::expectMemory<FamilyType>(reinterpret_cast<void *>(gpuAddress), &expectedNumberOfEnqueues, sizeof(uint32_t));
AUBCommandStreamFixture::expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), &argScalar, sizeof(size_t));
}

View File

@ -15,6 +15,7 @@
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h"
#include "opencl/test/unit_test/helpers/cl_hw_parse.h"
@ -992,7 +993,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabled
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenMakeEnqueueBlocking) {
HWTEST2_F(EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenMakeEnqueueBlocking, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
MyCmdQ<FamilyType> cmdQ(context, pClDevice);

View File

@ -13,6 +13,7 @@
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
@ -25,7 +26,7 @@
using namespace NEO;
using namespace DeviceHostQueue;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenQueueMatchesUnderlyingBuffer) {
HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenQueueMatchesUnderlyingBuffer, DeviceEnqueueSupport) {
// profiling disabled
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
@ -54,7 +55,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenQueue
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenFirstStackElementAtValueOne) {
HWTEST2_F(DeviceQueueHwTest, WhenResettingDeviceQueueThenFirstStackElementAtValueOneDeviceEnqueueSupport, DeviceEnqueueSupport) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
auto deviceQueueHw = castToHwType<FamilyType>(deviceQueue);
@ -67,7 +68,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenFirst
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcquiringEmCrticalSectionThenSectionIsNotAcquired) {
HWTEST2_F(DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcquiringEmCrticalSectionThenSectionIsNotAcquired, DeviceEnqueueSupport) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableNullHardware.set(1);
@ -82,7 +83,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcq
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeIsGreaterThanZero) {
HWTEST2_F(DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeIsGreaterThanZero, DeviceEnqueueSupport) {
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
deviceQueueProperties::minimumProperties[0]);
@ -90,7 +91,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeI
delete mockDeviceQueueHw;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGettingSlbCsThenParamsAreCorrect) {
HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
deviceQueueProperties::minimumProperties[0]);
@ -114,7 +115,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGet
delete mockDeviceQueueHw;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhenGettingSlbCsThenParamsAreCorrect) {
HWTEST2_F(DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhenGettingSlbCsThenParamsAreCorrect, DeviceEnqueueSupport) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
deviceQueueProperties::minimumProperties[0]);
@ -138,7 +139,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhen
delete mockDeviceQueueHw;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenDeviceQueueHWWhenEventPoolIsCreatedThenTimestampResolutionIsSet) {
HWTEST2_F(DeviceQueueHwTest, GivenDeviceQueueHWWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) {
auto timestampResolution = static_cast<float>(device->getProfilingTimerResolution());
auto deviceQueue = std::unique_ptr<DeviceQueue>(createQueueObject());
@ -162,7 +163,7 @@ class DeviceQueueSlb : public DeviceQueueHwTest {
}
};
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSizeIsAllocated) {
HWTEST2_F(DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSizeIsAllocated, DeviceEnqueueSupport) {
std::unique_ptr<MockDeviceQueueHw<FamilyType>> mockDeviceQueueHw(new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]));
LinearStream *slbCS = mockDeviceQueueHw->getSlbCS();
@ -176,7 +177,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSi
EXPECT_LE(expectedSize, slbCS->getAvailableSpace());
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAreCorrect) {
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAreCorrect, DeviceEnqueueSupport) {
auto mockDeviceQueueHw =
new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
auto mockDeviceQueueHwWithProfiling =
@ -260,7 +261,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAre
delete mockDeviceQueueHwWithProfiling;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect) {
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect, DeviceEnqueueSupport) {
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
deviceQueueProperties::minimumProperties[0]);
@ -299,7 +300,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect)
free(slbCopy);
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsCorrect) {
HWTEST2_F(DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsCorrect, DeviceEnqueueSupport) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@ -366,7 +367,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsC
delete mockDeviceQueueHw;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmCleanupSectionIsAdded) {
HWTEST2_F(DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmCleanupSectionIsAdded, DeviceEnqueueSupport) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@ -437,7 +438,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmC
delete mockDeviceQueueHw;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBufferParamsAreCorrect) {
HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBufferParamsAreCorrect, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
deviceQueue = createQueueObject();
@ -464,7 +465,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBuf
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOffsetIsCorrect) {
HWTEST2_F(DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOffsetIsCorrect, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
deviceQueue = createQueueObject();
@ -526,7 +527,7 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture,
MockContext *context = nullptr;
};
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed) {
HWTEST2_P(DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed, DeviceEnqueueSupport) {
EXPECT_TRUE(pKernel->isParentKernel);
pKernel->createReflectionSurface();
@ -555,7 +556,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectState
delete ssh;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet) {
HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet, DeviceEnqueueSupport) {
EXPECT_TRUE(pKernel->isParentKernel);
pKernel->createReflectionSurface();
@ -580,7 +581,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
delete ssh;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly) {
HWTEST2_P(DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly, DeviceEnqueueSupport) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
EXPECT_TRUE(pKernel->isParentKernel);
@ -612,7 +613,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
delete devQueueHw;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled) {
HWTEST2_P(DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled, DeviceEnqueueSupport) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
@ -662,7 +663,7 @@ struct TheSimplestDeviceQueueFixture : testing::Test {
}
};
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenResettingDeviceQueueThenEarlyReturnValuesAreSet) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenResettingDeviceQueueThenEarlyReturnValuesAreSet, DeviceEnqueueSupport) {
DebugManagerStateRestore dbgRestorer;
@ -678,7 +679,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenResettingDeviceQu
EXPECT_EQ(0u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturnCounter);
}
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddihMediaStateClearCmdsThenCmdsAreAddedCorrectly) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddihMediaStateClearCmdsThenCmdsAreAddedCorrectly, DeviceEnqueueSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
@ -710,7 +711,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddihMediaStateCl
EXPECT_NE(hwParser.cmdList.end(), mediaVfeStateItor);
}
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddingExecutionModelCleanupSectionThenMediaStateIsCleared) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenAddingExecutionModelCleanupSectionThenMediaStateIsCleared, DeviceEnqueueSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
@ -739,7 +740,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddingExecutionMo
EXPECT_TRUE(mockDeviceQueueHw->addMediaStateClearCmdsCalled);
}
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingMediaStateClearThenCmdsSizeIsCorrect) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingMediaStateClearThenCmdsSizeIsCorrect, DeviceEnqueueSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
@ -751,7 +752,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingMediaState
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getMediaStateClearCmdsSize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingExecutionModelCleanupThenSectionSizeIsCorrect) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingExecutionModelCleanupThenSectionSizeIsCorrect, DeviceEnqueueSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
@ -779,7 +780,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingExecutionM
EXPECT_EQ(expectedSize, MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingProfilingEndThenCmdsSizeIsCorrect) {
HWTEST2_F(TheSimplestDeviceQueueFixture, WhenSettingProfilingEndThenCmdsSizeIsCorrect, DeviceEnqueueSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;

View File

@ -10,6 +10,7 @@
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
@ -19,7 +20,7 @@ using namespace DeviceHostQueue;
using DeviceQueueSimpleTest = ::testing::Test;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, WhenExecutionModelDispatchIsSetupThenNoAdditionalActionsOccur) {
HWTEST2_F(DeviceQueueSimpleTest, WhenExecutionModelDispatchIsSetupThenNoAdditionalActionsOccur, DeviceEnqueueSupport) {
DeviceQueue devQueue;
char buffer[20];
@ -37,7 +38,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, WhenExecutionModelDispatchIsS
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, WhenResettingDeviceQueueThenIndirectHeapIsNotUsed) {
HWTEST2_F(DeviceQueueSimpleTest, WhenResettingDeviceQueueThenIndirectHeapIsNotUsed, DeviceEnqueueSupport) {
DeviceQueue devQueue;
devQueue.resetDeviceQueue();
EXPECT_EQ(nullptr, devQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
@ -69,7 +70,7 @@ class DeviceQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
ClDevice *device;
};
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, GivenDeviceQueueCapWhenCreatingAdditionalDeviceQueuesThenQueueIsNotCreated) {
HWTEST2_F(DeviceQueueTest, GivenDeviceQueueCapWhenCreatingAdditionalDeviceQueuesThenQueueIsNotCreated, DeviceEnqueueSupport) {
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = 1;
@ -85,7 +86,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, GivenDeviceQueueCapWhenCreatingAddi
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCreatedThenTimestampResolutionIsSet) {
HWTEST2_F(DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCreatedThenTimestampResolutionIsSet, DeviceEnqueueSupport) {
auto timestampResolution = static_cast<float>(device->getProfilingTimerResolution());
auto deviceQueue = std::unique_ptr<DeviceQueue>(createQueueObject());
@ -98,7 +99,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCrea
typedef DeviceQueueTest DeviceQueueBuffer;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenNoPropertyWhenCreatingQueueThenPreferredSizeIsSet) {
HWTEST2_F(DeviceQueueBuffer, GivenNoPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) {
auto &deviceInfo = device->getDeviceInfo();
deviceQueue = createQueueObject(); // only minimal properties
ASSERT_NE(deviceQueue, nullptr);
@ -106,7 +107,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenNoPropertyWhenCreatingQueueT
deviceQueue->release();
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenInvalidPropertyWhenCreatingQueueThenPreferredSizeIsSet) {
HWTEST2_F(DeviceQueueBuffer, GivenInvalidPropertyWhenCreatingQueueThenPreferredSizeIsSet, DeviceEnqueueSupport) {
cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1],
CL_QUEUE_SIZE, 0, 0};
auto &deviceInfo = device->getDeviceInfo();
@ -123,7 +124,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenInvalidPropertyWhenCreatingQ
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenValidSizeWhenCreatingQueueThenProvidedSizeIsSet) {
HWTEST2_F(DeviceQueueBuffer, GivenValidSizeWhenCreatingQueueThenProvidedSizeIsSet, DeviceEnqueueSupport) {
auto &deviceInfo = device->getDeviceInfo();
cl_uint validSize = deviceInfo.queueOnDevicePreferredSize - 1;
cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1],
@ -138,7 +139,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, GivenValidSizeWhenCreatingQueueTh
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, WhenDeviceQueueIsCreatedThenItIsCorrectlyInitialized) {
HWTEST2_F(DeviceQueueBuffer, WhenDeviceQueueIsCreatedThenItIsCorrectlyInitialized, DeviceEnqueueSupport) {
auto &deviceInfo = device->getDeviceInfo();
deviceQueue = createQueueObject();
@ -164,7 +165,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, WhenDeviceQueueIsCreatedThenItIsC
typedef DeviceQueueTest DeviceQueueStackBuffer;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenAllocatedResourcesAreZeroed) {
HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenAllocatedResourcesAreZeroed, DeviceEnqueueSupport) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
@ -173,7 +174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThen
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenStackBufferIsAllocated) {
HWTEST2_F(DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThenStackBufferIsAllocated, DeviceEnqueueSupport) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
@ -189,7 +190,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, WhenDeviceQueueIsCreatedThen
typedef DeviceQueueTest DeviceQueueStorageBuffer;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStorageBuffer, WhenDeviceQueueIsCreatedThenStorageBufferIsAllocated) {
HWTEST2_F(DeviceQueueStorageBuffer, WhenDeviceQueueIsCreatedThenStorageBufferIsAllocated, DeviceEnqueueSupport) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
@ -203,7 +204,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStorageBuffer, WhenDeviceQueueIsCreatedTh
typedef DeviceQueueTest DefaultDeviceQueue;
HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, GivenSingleDeviceQueueIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue) {
HWTEST2_F(DefaultDeviceQueue, GivenSingleDeviceQueueIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) {
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0};
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
@ -229,7 +230,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, GivenSingleDeviceQueueIsSupporte
const_cast<ClDeviceInfo *>(&device->getDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, GivenMultipleDeviceQueuesIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue) {
HWTEST2_F(DefaultDeviceQueue, GivenMultipleDeviceQueuesIsSupportedWhenSecondDeviceQueueIsCreatedThenReuseDeviceQueue, DeviceEnqueueSupport) {
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0};
auto maxOnDeviceQueues = device->getDeviceInfo().maxOnDeviceQueues;
@ -257,7 +258,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, GivenMultipleDeviceQueuesIsSuppo
typedef DeviceQueueTest DeviceQueueEventPool;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueEventPool, WhenDeviceQueueIsCreatedThenEventPoolBufferIsAllocated) {
HWTEST2_F(DeviceQueueEventPool, WhenDeviceQueueIsCreatedThenEventPoolBufferIsAllocated, DeviceEnqueueSupport) {
auto &deviceInfo = device->getDeviceInfo();
// number of events + event pool representation
@ -274,7 +275,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueEventPool, WhenDeviceQueueIsCreatedThenEv
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBufferIsAllocated) {
HWTEST2_F(DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBufferIsAllocated, DeviceEnqueueSupport) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
@ -285,7 +286,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBuff
delete deviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDispatchingSchedulerThenNoAssertsOccur) {
HWTEST2_F(DeviceQueueTest, WhenDispatchingSchedulerThenNoAssertsOccur, DeviceEnqueueSupport) {
DeviceQueue devQueue;
MockProgram program(toClDeviceVector(*device));
MockCommandQueue cmdQ(nullptr, nullptr, 0, false);

View File

@ -7,6 +7,7 @@
#include "opencl/source/context/context.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
using namespace NEO;
using namespace DeviceHostQueue;
@ -31,7 +32,7 @@ class GetDeviceQueueInfoTest : public DeviceHostQueueFixture<DeviceQueue> {
DeviceQueue *deviceQueue = nullptr;
};
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueContextWhenGettingDeviceQueueInfoThenSuccessIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueContextWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_context contextReturned = nullptr;
retVal = deviceQueue->getCommandQueueInfo(
@ -43,7 +44,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueContextWhenGetting
EXPECT_EQ((cl_context)pContext, contextReturned);
}
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueDeviceWhenGettingDeviceQueueInfoThenSuccessIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_device_id deviceExpected = testedClDevice;
cl_device_id deviceIdReturned = nullptr;
@ -56,7 +57,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueDeviceWhenGettingD
EXPECT_EQ(deviceExpected, deviceIdReturned);
}
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueuePropertiesWhenGettingDeviceQueueInfoThenSuccessIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueuePropertiesWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_command_queue_properties propertiesReturned = 0;
retVal = deviceQueue->getCommandQueueInfo(
@ -68,7 +69,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueuePropertiesWhenGett
EXPECT_EQ(deviceQueueProperties::allProperties[1], propertiesReturned);
}
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueSizeWhenGettingDeviceQueueInfoThenSuccessIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueSizeWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_uint queueSizeReturned = 0;
retVal = deviceQueue->getCommandQueueInfo(
@ -81,7 +82,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueSizeWhenGettingDev
}
// OCL 2.1
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueDeviceDefaultWhenGettingDeviceQueueInfoThenSuccessIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenQueueDeviceDefaultWhenGettingDeviceQueueInfoThenSuccessIsReturned, DeviceEnqueueSupport) {
cl_command_queue commandQueueReturned = nullptr;
retVal = deviceQueue->getCommandQueueInfo(
@ -95,11 +96,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenQueueDeviceDefaultWhenG
EXPECT_EQ(deviceQueue, commandQueueReturned);
}
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, WhenGettingDeviceQueueInfoThenProfilingIsEnabled) {
HWTEST2_F(GetDeviceQueueInfoTest, WhenGettingDeviceQueueInfoThenProfilingIsEnabled, DeviceEnqueueSupport) {
EXPECT_TRUE(deviceQueue->isProfilingEnabled());
}
HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, GivenInvalidParamWhenGettingDeviceQueueInfoThenInvalidValueErrorIsReturned) {
HWTEST2_F(GetDeviceQueueInfoTest, GivenInvalidParamWhenGettingDeviceQueueInfoThenInvalidValueErrorIsReturned, DeviceEnqueueSupport) {
uint32_t tempValue = 0;
retVal = deviceQueue->getCommandQueueInfo(

View File

@ -21,6 +21,7 @@
#include "opencl/source/event/user_event.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/helpers/gtest_helpers.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
@ -35,7 +36,7 @@ static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
typedef ExecutionModelKernelTest ParentKernelEnqueueTest;
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables) {
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
@ -122,7 +123,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident) {
HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident, DeviceEnqueueSupport) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
int32_t executionStamp = 0;
@ -151,7 +152,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivate
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) {
HWTEST2_P(ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
@ -186,7 +187,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemor
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) {
HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident, DeviceEnqueueSupport) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
@ -202,7 +203,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocks
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) {
HWTEST2_P(ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident, DeviceEnqueueSupport) {
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
@ -215,7 +216,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFill
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) {
HWTEST2_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks, DeviceEnqueueSupport) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
@ -240,7 +241,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocks
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) {
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
@ -269,7 +270,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated) {
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated, DeviceEnqueueSupport) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
@ -285,7 +286,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsed
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) {
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied, DeviceEnqueueSupport) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
@ -345,7 +346,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenReflectionSurfaceIsCreated) {
HWTEST2_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenReflectionSurfaceIsCreated, DeviceEnqueueSupport) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
@ -359,7 +360,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset) {
HWTEST2_P(ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset, DeviceEnqueueSupport) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
cl_queue_properties properties[3] = {0};
@ -380,7 +381,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParent
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident) {
HWTEST2_P(ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident, DeviceEnqueueSupport) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
@ -442,7 +443,7 @@ class ParentKernelEnqueueFixture : public ExecutionModelSchedulerTest,
}
};
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenDefaultDeviceQueueAndEventPoolIsPatched) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenDefaultDeviceQueueAndEventPoolIsPatched, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
@ -466,7 +467,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenBlocksDSHOnReflectionSurfaceArePatchedWithDeviceQueueAndEventPoolAddresses) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenBlocksDSHOnReflectionSurfaceArePatchedWithDeviceQueueAndEventPoolAddresses, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
@ -501,7 +502,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenDeviceQueueCriticalSetionIsAcquired) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenDeviceQueueCriticalSetionIsAcquired, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
@ -516,7 +517,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToBlockedQueueThenDeviceQueueCriticalSetionIsNotAcquired) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToBlockedQueueThenDeviceQueueCriticalSetionIsNotAcquired, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
@ -535,7 +536,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenFlushCsrWithSlm) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenFlushCsrWithSlm, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
@ -550,7 +551,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedWithSchedulerReturnInstanceThenRunSimulation) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedWithSchedulerReturnInstanceThenRunSimulation, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
@ -580,7 +581,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKernelIsSubmittedThenItIsFlushed) {
HWTEST2_F(ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKernelIsSubmittedThenItIsFlushed, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
@ -603,7 +604,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, givenCsrInBatchingModeWh
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenMarkCsrMediaVfeStateDirty) {
HWTEST2_F(ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenMarkCsrMediaVfeStateDirty, DeviceEnqueueSupport) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};

View File

@ -12,6 +12,7 @@
#include "opencl/source/command_queue/enqueue_kernel.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
@ -34,7 +35,7 @@ class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest,
}
};
HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenProgrammingIsCorrect) {
HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenProgrammingIsCorrect, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
@ -157,7 +158,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472());
}
HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenStandardCmdqIohIsNotUsed) {
HWTEST2_F(ExecutionModelSchedulerFixture, WhenDispatchingSchedulerThenStandardCmdqIohIsNotUsed, DeviceEnqueueSupport) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
@ -191,7 +192,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
EXPECT_EQ(0u, ioh.getUsed());
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSetToFirstInstanceWhenDispatchingSchedulerThenBbStartCmdIsNotInserted) {
HWTEST2_F(ParentKernelCommandQueueFixture, GivenEarlyReturnSetToFirstInstanceWhenDispatchingSchedulerThenBbStartCmdIsNotInserted, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -229,7 +230,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSet
EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
}
HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenForceDispatchingSchedulerThenSchedulerKernelIsEnqueued) {
HWTEST2_F(ExecutionModelSchedulerFixture, WhenForceDispatchingSchedulerThenSchedulerKernelIsEnqueued, DeviceEnqueueSupport) {
DebugManagerStateRestore dbgRestorer;

View File

@ -12,6 +12,7 @@
#include "opencl/source/command_queue/hardware_interface.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/task_information.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
@ -79,7 +80,7 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
HwTimeStamps *timestampAddedInCleanupSection = nullptr;
};
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -120,7 +121,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -184,7 +185,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -225,7 +226,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -266,7 +267,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -305,7 +306,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapsWhenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapsWhenParentKernelIsSubmittedThenQueueHeapsAreNotUsed, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -362,7 +363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};
@ -408,7 +409,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen
delete cmdComputeKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
HWTEST2_F(ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
cl_queue_properties properties[3] = {0};

View File

@ -19,6 +19,7 @@ set(IGDRCL_SRCS_tests_fixtures
${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_matcher.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/execution_model_kernel_fixture.h

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "test_traits_common.h"
struct DeviceEnqueueSupport {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::deviceEnqueueSupport;
}
};

View File

@ -19,7 +19,6 @@ if(TESTS_GEN12LP)
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_tests_gen12lp.cpp
@ -28,10 +27,8 @@ if(TESTS_GEN12LP)
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/profiling_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen12lp.inl
)

View File

@ -1,47 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/mocks/mock_device.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
using namespace NEO;
using namespace DeviceHostQueue;
GEN12LPTEST_F(DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOffsetIsAddedToBlockKernelStartPointer) {
auto device = pContext->getDevice(0);
std::unique_ptr<MockParentKernel> mockParentKernel(MockParentKernel::create(*pContext));
KernelInfo *blockInfo = const_cast<KernelInfo *>(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0));
blockInfo->createKernelAllocation(device->getDevice(), false);
ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation());
blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP = 0x1234;
auto &hwInfo = const_cast<HardwareInfo &>(device->getHardwareInfo());
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo);
uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP;
uint64_t offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(device->getDevice(), blockInfo, true);
EXPECT_EQ(expectedOffset, offset);
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(device->getDevice(), blockInfo, false);
EXPECT_EQ(expectedOffset, offset);
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A1, hwInfo);
expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(device->getDevice(), blockInfo, true);
EXPECT_EQ(expectedOffset, offset);
offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(device->getDevice(), blockInfo, false);
EXPECT_EQ(expectedOffset, offset);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -17,9 +17,7 @@
#include "opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/sampler_tests_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/scheduler_source_tests_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/tbx_command_stream_receiver_tests_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/test_device_queue_hw_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/test_platform_caps_gen12lp.inl"
#include "opencl/test/unit_test/gen12lp/test_sample_gen12lp.inl"

View File

@ -1,32 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen12lp/hw_cmds.h"
#include "opencl/source/device_queue/device_queue_hw.h"
// Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one
#include "opencl/source/gen12lp/device_enqueue.h"
#include "opencl/source/gen12lp/scheduler_definitions.h"
#include "opencl/test/unit_test/scheduler/scheduler_source_tests.h"
// Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them
#include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl"
using namespace NEO;
typedef SchedulerSourceTest SchedulerSourceTestGen12;
GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) {
givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest<FamilyType>();
}
GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) {
givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest<FamilyType>();
}
GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) {
givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest<FamilyType>();
}

View File

@ -1,85 +0,0 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/context/context.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
using namespace NEO;
using namespace DeviceHostQueue;
typedef DeviceQueueHwTest Gen12LpDeviceQueueSlb;
GEN12LPTEST_F(Gen12LpDeviceQueueSlb, WhenGettingSlbBufferSizeThenAllocationSizeIsCorrect) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
auto expectedSize = getMinimumSlbSize<FamilyType>();
expectedSize *= 128; //num of enqueues
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
expectedSize += (4 * MemoryConstants::pageSize);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr);
EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize);
delete deviceQueue;
}
GEN12LPTEST_F(Gen12LpDeviceQueueSlb, WhenApplyingSlbCommandsWaThenCorrectWaAreEnabled) {
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);
EXPECT_FALSE(mockDeviceQueueHw->arbCheckWa);
EXPECT_FALSE(mockDeviceQueueHw->pipeControlWa);
EXPECT_FALSE(mockDeviceQueueHw->miAtomicWa);
EXPECT_FALSE(mockDeviceQueueHw->lriWa);
delete mockDeviceQueueHw;
}
GEN12LPTEST_F(Gen12LpDeviceQueueSlb, givenDeviceCommandQueueWithProfilingWhenBatchBufferIsBuiltThenOneMiStoreRegisterMemWithMmioRemapEnableIsPresent) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]);
auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize();
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
uint32_t taskCount = 7;
auto hwTimeStamp = pCommandQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
mockDeviceQueueHw->buildSlbDummyCommands();
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, 0x123, taskCount);
HardwareParse hwParser;
auto *slbCS = mockDeviceQueueHw->getSlbCS();
size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize);
size_t cleanupSectionOffsetToParse = cleanupSectionOffset;
hwParser.parseCommands<FamilyType>(*slbCS, cleanupSectionOffsetToParse);
hwParser.findHardwareCommands<FamilyType>();
auto itorMiStore = find<MI_STORE_REGISTER_MEM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiStore);
auto pMiStore = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorMiStore);
ASSERT_NE(nullptr, pMiStore);
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pMiStore->getRegisterAddress());
EXPECT_TRUE(pMiStore->getMmioRemapEnable());
++itorMiStore;
pMiStore = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorMiStore);
EXPECT_EQ(nullptr, pMiStore);
delete mockParentKernel;
delete mockDeviceQueueHw;
}

View File

@ -14,10 +14,7 @@ if(TESTS_GEN8)
${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_dispatch_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen8.cpp

View File

@ -1,70 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "opencl/source/command_queue/enqueue_kernel.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "test.h"
#include "gtest/gtest.h"
using namespace NEO;
typedef Test<ExecutionModelSchedulerTest> BdwSchedulerTest;
BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSStallIsAddedThenDCFlushEnabledIsSet) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
if (pClDevice->areOcl21FeaturesSupported()) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
false, false, false, multiDispatchinfo,
nullptr, 0, false, false);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
commandStream,
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);
hwParser.findHardwareCommands<FamilyType>();
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
GenCmdList pcList = hwParser.getCommandsList<PIPE_CONTROL>();
EXPECT_NE(0u, pcList.size());
for (GenCmdList::iterator it = pcList.begin(); it != pcList.end(); it++) {
PIPE_CONTROL *pc = (PIPE_CONTROL *)*it;
ASSERT_NE(nullptr, pc);
if (pc->getCommandStreamerStallEnable()) {
EXPECT_TRUE(pc->getDcFlushEnable());
}
}
}
}

View File

@ -1,32 +0,0 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen8/hw_cmds.h"
#include "opencl/source/device_queue/device_queue_hw.h"
// Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one
#include "opencl/source/gen8/device_enqueue.h"
#include "opencl/source/gen8/scheduler_definitions.h"
#include "opencl/test/unit_test/scheduler/scheduler_source_tests.h"
// Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them
#include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl"
using namespace NEO;
typedef SchedulerSourceTest SchedulerSourceTestGen8;
GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) {
givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest<FamilyType>();
}
GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) {
givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest<FamilyType>();
}
GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) {
givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest<FamilyType>();
}

View File

@ -82,3 +82,6 @@ GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckingImageSupportThenReturnTrue) {
GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckingMediaBlockSupportThenReturnTrue) {
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock);
}
GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckingDeviceEnqueueSupportThenReturnFalse) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue);
}

View File

@ -1,81 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "opencl/source/context/context.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
using namespace NEO;
using namespace DeviceHostQueue;
typedef DeviceQueueHwTest Gen8DeviceQueueSlb;
GEN8TEST_F(Gen8DeviceQueueSlb, WhenGettingSlbBufferSizeThenAllocationSizeIsCorrect) {
deviceQueue = createQueueObject();
ASSERT_NE(deviceQueue, nullptr);
auto expectedSize = getMinimumSlbSize<FamilyType>() +
sizeof(typename FamilyType::MI_ATOMIC) +
sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM);
expectedSize *= 128; //num of enqueues
expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
expectedSize += MockDeviceQueueHw<FamilyType>::getExecutionModelCleanupSectionSize();
expectedSize += (4 * MemoryConstants::pageSize);
expectedSize = alignUp(expectedSize, MemoryConstants::pageSize);
ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr);
EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize);
delete deviceQueue;
}
GEN8TEST_F(Gen8DeviceQueueSlb, WhenApplyingSlbCommandsWaThenCorrectWaAreEnabled) {
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);
EXPECT_FALSE(mockDeviceQueueHw->arbCheckWa);
EXPECT_FALSE(mockDeviceQueueHw->pipeControlWa);
EXPECT_TRUE(mockDeviceQueueHw->miAtomicWa);
EXPECT_TRUE(mockDeviceQueueHw->lriWa);
delete mockDeviceQueueHw;
}
GEN8TEST_F(Gen8DeviceQueueSlb, WhenAddingProfilingEndCmdsThenPipeControlIsProgrammedCorrectly) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto mockDeviceQueueHw = new MockDeviceQueueHw<FamilyType>(pContext, device,
DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);
uint64_t timestampAddress = 0x12345678555500;
uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF);
uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32);
mockDeviceQueueHw->addProfilingEndCmds(timestampAddress);
HardwareParse hwParser;
auto *slbCS = mockDeviceQueueHw->getSlbCS();
hwParser.parseCommands<FamilyType>(*slbCS, 0);
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), pipeControlItor);
PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor;
uint32_t postSyncOp = (uint32_t)PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP;
EXPECT_EQ(postSyncOp, (uint32_t)pipeControl->getPostSyncOperation());
EXPECT_EQ(timestampAddressLow, pipeControl->getAddress());
EXPECT_EQ(timestampAddressHigh, pipeControl->getAddressHigh());
delete mockDeviceQueueHw;
}

View File

@ -35,6 +35,7 @@
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/program/create.inl"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
@ -1070,7 +1071,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenK
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWCMDTEST_F(IGFX_GEN8_CORE, GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithDeviceEnqueueIsUsedThenKernelCreateAndSubmitCallbacksAreNotCalled) {
HWTEST2_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithDeviceEnqueueIsUsedThenKernelCreateAndSubmitCallbacksAreNotCalled, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pDevice);
gtpinCallbacks.onContextCreate = OnContextCreate;

View File

@ -14,6 +14,7 @@
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/pipe.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
@ -423,11 +424,9 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
EXPECT_EQ(3, sampler->getRefInternalCount());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect) {
HWTEST2_F(CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect, DeviceEnqueueSupport) {
pKernelInfo->addArgDevQueue(0, 0x20, sizeof(void *));
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device1);
cl_queue_properties queueProps[5] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,

View File

@ -7,6 +7,7 @@
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
@ -64,11 +65,11 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
std::unique_ptr<MockKernelInfo> pKernelInfo;
};
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenKernelWithDevQueueArgWhenSettingArgHandleThenCorrectHandleIsSet) {
HWTEST2_F(KernelArgDevQueueTest, GivenKernelWithDevQueueArgWhenSettingArgHandleThenCorrectHandleIsSet, DeviceEnqueueSupport) {
EXPECT_EQ(pKernel->kernelArgHandlers[0], &Kernel::setArgDevQueue);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenDeviceQueueWhenSettingArgDevQueueThenCorrectlyPatched) {
HWTEST2_F(KernelArgDevQueueTest, GivenDeviceQueueWhenSettingArgDevQueueThenCorrectlyPatched, DeviceEnqueueSupport) {
auto clDeviceQueue = static_cast<cl_command_queue>(pDeviceQueue);
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clDeviceQueue);
@ -79,7 +80,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenDeviceQueueWhenSettingAr
EXPECT_EQ(*(reinterpret_cast<uint32_t *>(patchLocation)), gpuAddress);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenCommandQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned) {
HWTEST2_F(KernelArgDevQueueTest, GivenCommandQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
auto clCmdQueue = static_cast<cl_command_queue>(pCommandQueue);
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clCmdQueue);
@ -87,7 +88,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenCommandQueueWhenSettingA
EXPECT_EQ(crossThreadDataUnchanged(), true);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenNonQueueObjectWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned) {
HWTEST2_F(KernelArgDevQueueTest, GivenNonQueueObjectWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
Buffer *buffer = new MockBuffer();
auto clBuffer = static_cast<cl_mem>(buffer);
@ -98,7 +99,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenNonQueueObjectWhenSettin
delete buffer;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenInvalidQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned) {
HWTEST2_F(KernelArgDevQueueTest, GivenInvalidQueueWhenSettingArgDevQueueThenInvalidDeviceQueueErrorIsReturned, DeviceEnqueueSupport) {
char *pFakeDeviceQueue = new char[sizeof(DeviceQueue)];
auto clFakeDeviceQueue = reinterpret_cast<cl_command_queue *>(pFakeDeviceQueue);
@ -109,13 +110,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenInvalidQueueWhenSettingA
delete[] pFakeDeviceQueue;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenNullDeviceQueueWhenSettingArgDevQueueThenInvalidArgValueErrorIsReturned) {
HWTEST2_F(KernelArgDevQueueTest, GivenNullDeviceQueueWhenSettingArgDevQueueThenInvalidArgValueErrorIsReturned, DeviceEnqueueSupport) {
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), nullptr);
EXPECT_EQ(ret, CL_INVALID_ARG_VALUE);
EXPECT_EQ(crossThreadDataUnchanged(), true);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenInvalidSizeWhenSettingArgDevQueueThenInvalidArgSizeErrorIsReturned) {
HWTEST2_F(KernelArgDevQueueTest, GivenInvalidSizeWhenSettingArgDevQueueThenInvalidArgSizeErrorIsReturned, DeviceEnqueueSupport) {
auto clDeviceQueue = static_cast<cl_command_queue>(pDeviceQueue);
auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue) - 1, &clDeviceQueue);

View File

@ -32,6 +32,7 @@
#include "opencl/source/mem_obj/image.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/helpers/gtest_helpers.h"
@ -1007,7 +1008,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface) {
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -1041,7 +1042,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed) {
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -1075,8 +1076,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched) {
HWTEST2_F(KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
@ -1097,7 +1097,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched) {
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
@ -1116,7 +1116,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched) {
HWTEST2_F(KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
@ -1137,7 +1137,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface) {
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -1170,7 +1170,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed) {
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -1205,7 +1205,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched) {
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -1223,7 +1223,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched) {
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
@ -1244,7 +1244,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith
delete pKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched) {
HWTEST2_F(KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched, DeviceEnqueueSupport) {
// define kernel info
auto pKernelInfo = std::make_unique<MockKernelInfo>();
@ -3089,7 +3089,7 @@ class DeviceQueueHwMock : public DeviceQueueHw<GfxFamily> {
};
} // namespace NEO
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue) {
HWTEST2_F(DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue, DeviceEnqueueSupport) {
std::unique_ptr<DeviceQueueHwMock<FamilyType>> mockDeviceQueueHw(new DeviceQueueHwMock<FamilyType>(pContext, device, deviceQueueProperties::minimumProperties[0]));
auto slb = mockDeviceQueueHw->getSlbBuffer();

View File

@ -27,6 +27,7 @@ add_executable(igdrcl_mt_tests EXCLUDE_FROM_ALL
target_include_directories(igdrcl_mt_tests PRIVATE
${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests
${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX}
${NEO_SHARED_TEST_DIRECTORY}/common/helpers/includes${BRANCH_DIR_SUFFIX}
${NEO_SOURCE_DIR}/opencl/source/gen_common
)

View File

@ -7,6 +7,7 @@
#include "shared/test/common/mocks/mock_device.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
@ -17,7 +18,7 @@ using namespace NEO;
typedef ::testing::Test DeviceQueueHwMtTest;
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwMtTest, givenTakenIgilCriticalSectionWhenSecondThreadIsWaitingThenDontHang) {
HWTEST2_F(DeviceQueueHwMtTest, givenTakenIgilCriticalSectionWhenSecondThreadIsWaitingThenDontHang, DeviceEnqueueSupport) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));

View File

@ -11,6 +11,7 @@
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/device_queue_matcher.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
#include "test.h"
@ -24,7 +25,7 @@
using namespace NEO;
using namespace BuiltinKernelsSimulation;
HWCMDTEST_F(IGFX_GEN8_CORE, SchedulerSourceTest, WhenEnqueingThenGpgpuWalkerIsPatchedCorrectly) {
HWTEST2_F(SchedulerSourceTest, WhenEnqueingThenGpgpuWalkerIsPatchedCorrectly, DeviceEnqueueSupport) {
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

View File

@ -75,7 +75,7 @@ const RuntimeCapabilityTable BDW::capabilityTable{
false, // supportsVme
false, // supportCacheFlushAfterWalker
true, // supportsImages
true, // supportsDeviceEnqueue
false, // supportsDeviceEnqueue
true, // supportsPipes
true, // supportsOcl21Features
false, // supportsOnDemandPageFaults

View File

@ -15,4 +15,5 @@ struct TestTraits<IGFX_GEN11_CORE> {
static constexpr bool iohInSbaSupported = true;
static constexpr bool auxTranslationSupported = false;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = true;
};

View File

@ -15,4 +15,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
static constexpr bool iohInSbaSupported = true;
static constexpr bool auxTranslationSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = false;
};

View File

@ -14,4 +14,5 @@ struct TestTraits<IGFX_GEN8_CORE> {
static constexpr bool programOnlyChangedFieldsInComputeStateMode = true;
static constexpr bool iohInSbaSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = false;
};

View File

@ -14,4 +14,5 @@ struct TestTraits<IGFX_GEN9_CORE> {
static constexpr bool programOnlyChangedFieldsInComputeStateMode = true;
static constexpr bool iohInSbaSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = true;
};

View File

@ -22,4 +22,5 @@ struct TestTraits<IGFX_XE_HP_CORE> {
static constexpr bool iohInSbaSupported = false;
static constexpr bool auxTranslationSupported = true;
static constexpr bool isUsingNonDefaultIoctls = true;
static constexpr bool deviceEnqueueSupport = false;
};