Add cache flush command after WALKER command

Change-Id: I3983dc6c0797047e17cc8189655a22a22e85892b
This commit is contained in:
Zdanowicz, Zbigniew 2018-12-06 15:33:02 +01:00 committed by sys_ocldev
parent 9e81469d9f
commit 3dca095ccf
46 changed files with 1008 additions and 408 deletions

View File

@ -3405,7 +3405,8 @@ void *CL_API_CALL clSVMAlloc(cl_context context,
return pAlloc;
}
pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(size, !!(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER));
pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(size, !!(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER),
SVMAllocsManager::memFlagIsReadOnly(flags));
if (pContext->isProvidingPerformanceHints()) {
pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, pAlloc, size);

View File

@ -418,7 +418,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool res
template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(pKernel) +
sizeof(PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(commandQueue.getDevice());
if (reserveProfilingCmdsSpace) {

View File

@ -191,6 +191,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
// Program the walker. Invokes execution so all state should already be programmed
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel);
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);

View File

@ -38,33 +38,36 @@ const PLATFORM CNL::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable CNL::capabilityTable{0,
83.333,
21,
true,
true,
true,
true,
true, // ftrSupportsVmeAvcTextureSampler
true, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, true},
&isSimulationCNL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
true,
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Cnl,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable CNL::capabilityTable{
{0, 0, 0, false, false, false}, // kmdNotifyProperties
{true, true}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
83.333, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationCNL, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
21, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Cnl, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
true, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
true, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
true, // ftr64KBpages
true, // instrumentationEnabled
true, // forceStatelessCompilationFor32Bit
true, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo CNL_2x5x8::hwInfo = {
&CNL::platform,

View File

@ -43,7 +43,7 @@ size_t GpgpuWalkerHelper<BDWFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
typedef typename BDWFamily::MI_MATH MI_MATH;
typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
size_t n = 0;
if ((pKernel != nullptr) && pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
n += sizeof(PIPE_CONTROL) +
(2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +

View File

@ -39,33 +39,36 @@ const PLATFORM BDW::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable BDW::capabilityTable{0,
80,
21,
true,
true,
true,
true,
false, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::Disabled,
{false, false},
&isSimulationBDW,
true,
true, // forceStatelessCompilationFor32Bit
{true, 50000, true, 5000, true, 200000}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
true, // isCore
false, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Bdw,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable BDW::capabilityTable{
{50000, 5000, 200000, true, true, true}, // kmdNotifyProperties
{false, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
80, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationBDW, // isSimulation
PreemptionMode::Disabled, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
21, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Bdw, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
true, // ftrSvm
true, // ftrSupportsCoherency
false, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
false, // ftr64KBpages
true, // instrumentationEnabled
true, // forceStatelessCompilationFor32Bit
true, // isCore
false, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo BDW_1x2x6::hwInfo = {
&BDW::platform,

View File

@ -43,7 +43,7 @@ size_t GpgpuWalkerHelper<SKLFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
typedef typename SKLFamily::MI_MATH MI_MATH;
typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
size_t n = 0;
if ((pKernel != nullptr) && pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
n += sizeof(PIPE_CONTROL) +
(2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +

View File

@ -36,33 +36,36 @@ const PLATFORM BXT::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable BXT::capabilityTable{0,
52.083,
12,
true,
true,
false, // ftrSvm
true,
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, false},
&isSimulationBXT,
true,
false, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
false, // isCore
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Bxt,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable BXT::capabilityTable{
{0, 0, 0, false, false, false}, // kmdNotifyProperties
{true, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
52.083, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationBXT, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
12, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Bxt, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
false, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
false, // ftr64KBpages
true, // instrumentationEnabled
false, // forceStatelessCompilationFor32Bit
false, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo BXT_1x2x6::hwInfo = {
&BXT::platform,

View File

@ -31,33 +31,36 @@ const PLATFORM CFL::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable CFL::capabilityTable{0,
83.333,
21,
true,
true,
true,
true,
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, false},
&isSimulationCFL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
true, // isCore
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Cfl,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable CFL::capabilityTable{
{0, 0, 0, false, false, false}, // kmdNotifyProperties
{true, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
83.333, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationCFL, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
21, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Cfl, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
true, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
true, // ftr64KBpages
true, // instrumentationEnabled
true, // forceStatelessCompilationFor32Bit
true, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo CFL_1x2x6::hwInfo = {
&CFL::platform,

View File

@ -31,33 +31,36 @@ const PLATFORM GLK::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable GLK::capabilityTable{0,
52.083,
12,
true,
true,
false, // ftrSvm
true,
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, false},
&isSimulationGLK,
true,
false, // forceStatelessCompilationFor32Bit
{true, 30000, false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
false, // isCore
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Glk,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable GLK::capabilityTable{
{30000, 0, 0, true, false, false}, // kmdNotifyProperties
{true, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
52.083, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationGLK, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
12, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Glk, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
false, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
false, // ftr64KBpages
true, // instrumentationEnabled
false, // forceStatelessCompilationFor32Bit
false, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo GLK_1x3x6::hwInfo = {
&GLK::platform,

View File

@ -31,33 +31,36 @@ const PLATFORM KBL::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable KBL::capabilityTable{0,
83.333,
21,
true,
true,
true,
true,
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, false},
&isSimulationKBL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
true, // isCore
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Kbl,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable KBL::capabilityTable{
{0, 0, 0, false, false, false}, // kmdNotifyProperties
{true, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
83.333, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationKBL, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
21, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Kbl, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
true, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
true, // ftr64KBpages
true, // instrumentationEnabled
true, // forceStatelessCompilationFor32Bit
true, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo KBL_1x2x6::hwInfo = {
&KBL::platform,

View File

@ -39,33 +39,36 @@ const PLATFORM SKL::platform = {
0, // usRevId_PCH
GTTYPE_UNDEFINED};
const RuntimeCapabilityTable SKL::capabilityTable{0,
83.333,
21,
true,
true,
true,
true,
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
PreemptionMode::MidThread,
{true, false},
&isSimulationSKL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
true, // isCore
true, // sourceLevelDebuggerSupported
CmdServicesMemTraceVersion::DeviceValues::Skl,
0, // extraQuantityThreadsPerEU
true, // SupportsVme
64, // slmSize
MemoryConstants::max48BitAddress}; // gpuAddressSpace
const RuntimeCapabilityTable SKL::capabilityTable{
{0, 0, 0, false, false, false}, // kmdNotifyProperties
{true, false}, // whitelistedRegisters
MemoryConstants::max48BitAddress, // gpuAddressSpace
83.333, // defaultProfilingTimerResolution
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
&isSimulationSKL, // isSimulation
PreemptionMode::MidThread, // defaultPreemptionMode
EngineType::ENGINE_RCS, // defaultEngineType
0, // maxRenderFrequency
21, // clVersionSupport
CmdServicesMemTraceVersion::DeviceValues::Skl, // aubDeviceId
0, // extraQuantityThreadsPerEU
64, // slmSize
true, // ftrSupportsFP64
true, // ftrSupports64BitMath
true, // ftrSvm
true, // ftrSupportsCoherency
true, // ftrSupportsVmeAvcTextureSampler
false, // ftrSupportsVmeAvcPreemption
false, // ftrRenderCompressedBuffers
false, // ftrRenderCompressedImages
true, // ftr64KBpages
true, // instrumentationEnabled
true, // forceStatelessCompilationFor32Bit
true, // isCore
true, // sourceLevelDebuggerSupported
true, // supportsVme
false // supportCacheFlushAfterWalker
};
const HardwareInfo SKL_1x2x6::hwInfo = {
&SKL::platform,

View File

@ -32,10 +32,19 @@ struct WhitelistedRegisters {
};
struct RuntimeCapabilityTable {
uint32_t maxRenderFrequency;
KmdNotifyProperties kmdNotifyProperties;
WhitelistedRegisters whitelistedRegisters;
uint64_t gpuAddressSpace;
double defaultProfilingTimerResolution;
size_t requiredPreemptionSurfaceSize;
bool (*isSimulation)(unsigned short);
PreemptionMode defaultPreemptionMode;
EngineType defaultEngineType;
uint32_t maxRenderFrequency;
unsigned int clVersionSupport;
uint32_t aubDeviceId;
uint32_t extraQuantityThreadsPerEU;
uint32_t slmSize;
bool ftrSupportsFP64;
bool ftrSupports64BitMath;
bool ftrSvm;
@ -44,29 +53,13 @@ struct RuntimeCapabilityTable {
bool ftrSupportsVmeAvcPreemption;
bool ftrRenderCompressedBuffers;
bool ftrRenderCompressedImages;
PreemptionMode defaultPreemptionMode;
WhitelistedRegisters whitelistedRegisters;
bool (*isSimulation)(unsigned short);
bool instrumentationEnabled;
bool forceStatelessCompilationFor32Bit;
KmdNotifyProperties kmdNotifyProperties;
bool ftr64KBpages;
EngineType defaultEngineType;
size_t requiredPreemptionSurfaceSize;
bool instrumentationEnabled;
bool forceStatelessCompilationFor32Bit;
bool isCore;
bool sourceLevelDebuggerSupported;
uint32_t aubDeviceId;
uint32_t extraQuantityThreadsPerEU;
bool supportsVme;
uint32_t slmSize;
uint64_t gpuAddressSpace;
bool supportCacheFlushAfterWalker;
};
struct HardwareCapabilities {

View File

@ -142,7 +142,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
const bool &kernelUsesLocalIds,
Kernel &kernel);
static size_t getSizeRequiredCS();
static size_t getSizeRequiredCS(const Kernel *kernel);
static bool isPipeControlWArequired();
static size_t getSizeRequiredDSH(
const Kernel &kernel);
@ -202,6 +202,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel);
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);

View File

@ -7,6 +7,7 @@
#pragma once
#include "runtime/helpers/kernel_commands.h"
#include "runtime/kernel/kernel.h"
namespace OCLRT {
@ -43,9 +44,13 @@ uint32_t KernelCommandsHelper<GfxFamily>::additionalSizeRequiredDsh() {
}
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() {
return 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD);
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(const Kernel *kernel) {
size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD);
if (kernel->requiresCacheFlushCommand()) {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
return size;
}
template <typename GfxFamily>
@ -155,4 +160,14 @@ bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32
return true;
}
template <typename GfxFamily>
void KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel) {
if (kernel->requiresCacheFlushCommand()) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
*pipeControl = GfxFamily::cmdInitPipeControl;
pipeControl->setCommandStreamerStallEnable(true);
pipeControl->setDcFlushEnable(true);
}
}
} // namespace OCLRT

View File

@ -14,15 +14,15 @@
namespace OCLRT {
struct KmdNotifyProperties {
int64_t delayKmdNotifyMicroseconds;
int64_t delayQuickKmdSleepMicroseconds;
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
// Main switch for KMD Notify optimization - if its disabled, all below are disabled too
bool enableKmdNotify;
int64_t delayKmdNotifyMicroseconds;
// Use smaller delay in specific situations (ie. from AsyncEventsHandler)
bool enableQuickKmdSleep;
int64_t delayQuickKmdSleepMicroseconds;
// If waits are called sporadically use QuickKmdSleep mode, otherwise use standard delay
bool enableQuickKmdSleepForSporadicWaits;
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
};
namespace KmdNotifyConstants {

View File

@ -309,6 +309,7 @@ cl_int Kernel::initialize() {
kernelArguments.resize(numArgs);
slmSizes.resize(numArgs);
kernelArgHandlers.resize(numArgs);
kernelArgRequiresCacheFlush.resize(numArgs);
for (uint32_t i = 0; i < numArgs; ++i) {
storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0);
@ -849,6 +850,8 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
patchedArgumentsNum++;
kernelArguments[argIndex].isPatched = true;
}
addAllocationToCacheFlushVector(argIndex, svmAlloc);
return CL_SUCCESS;
}
@ -884,6 +887,9 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
patchedArgumentsNum++;
kernelArguments[argIndex].isPatched = true;
}
addAllocationToCacheFlushVector(argIndex, svmAlloc);
return CL_SUCCESS;
}
@ -908,10 +914,14 @@ const Kernel::SimpleKernelArgInfo &Kernel::getKernelArgInfo(uint32_t argIndex) c
void Kernel::setKernelExecInfo(GraphicsAllocation *argValue) {
kernelSvmGfxAllocations.push_back(argValue);
if (allocationForCacheFlush(argValue)) {
svmAllocationsRequireCacheFlush = true;
}
}
void Kernel::clearKernelExecInfo() {
kernelSvmGfxAllocations.clear();
svmAllocationsRequireCacheFlush = false;
}
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
@ -1119,7 +1129,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode);
}
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
return CL_SUCCESS;
} else {
@ -1243,7 +1253,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
patch<uint32_t, cl_channel_order>(imageFormat.image_channel_order, crossThreadData, kernelArgInfo.offsetChannelOrder);
patch<uint32_t, uint32_t>(kernelArgInfo.offsetHeap, crossThreadData, kernelArgInfo.offsetObjectId);
patch<uint32_t, cl_uint>(imageDesc.num_mip_levels, crossThreadData, kernelArgInfo.offsetNumMipLevels);
addAllocationToCacheFlushVector(argIndex, pImage->getGraphicsAllocation());
retVal = CL_SUCCESS;
}
@ -2122,4 +2132,51 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
}
}
}
bool Kernel::platformSupportCacheFlushAfterWalker() const {
int32_t dbgFlag = DebugManager.flags.EnableCacheFlushAfterWalker.get();
if (dbgFlag == 1) {
return true;
} else if (dbgFlag == 0) {
return false;
}
return device.getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker;
}
bool Kernel::requiresCacheFlushCommand() const {
if (platformSupportCacheFlushAfterWalker()) {
if (getProgram()->getGlobalSurface() != nullptr) {
return true;
}
if (svmAllocationsRequireCacheFlush) {
return true;
}
size_t args = kernelArgRequiresCacheFlush.size();
for (size_t i = 0; i < args; i++) {
if (kernelArgRequiresCacheFlush[i] != nullptr) {
return true;
}
}
}
return false;
}
bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) {
if (argAllocation->flushL3Required || argAllocation->isMemObjectsAllocationWithWritableFlags()) {
return true;
}
return false;
}
void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) {
if (argAllocation == nullptr) {
kernelArgRequiresCacheFlush[argIndex] = nullptr;
} else {
if (allocationForCacheFlush(argAllocation)) {
kernelArgRequiresCacheFlush[argIndex] = argAllocation;
} else {
kernelArgRequiresCacheFlush[argIndex] = nullptr;
}
}
}
} // namespace OCLRT

View File

@ -374,6 +374,8 @@ class Kernel : public BaseObject<_cl_kernel> {
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &buffersForAuxTranslation);
bool requiresCacheFlushCommand() const;
protected:
struct ObjectCounts {
uint32_t imageCount;
@ -461,6 +463,9 @@ class Kernel : public BaseObject<_cl_kernel> {
void reconfigureKernel();
bool platformSupportCacheFlushAfterWalker() const;
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
bool allocationForCacheFlush(GraphicsAllocation *argAllocation);
Program *program;
Context *context;
const Device &device;
@ -493,5 +498,7 @@ class Kernel : public BaseObject<_cl_kernel> {
std::unique_ptr<ImageTransformer> imageTransformer;
bool specialPipelineSelectMode = false;
bool svmAllocationsRequireCacheFlush = false;
std::vector<GraphicsAllocation *> kernelArgRequiresCacheFlush;
};
} // namespace OCLRT

View File

@ -1,23 +1,8 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (C) 2017-2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
* SPDX-License-Identifier: MIT
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/memory_manager/memory_manager.h"
@ -64,7 +49,7 @@ GraphicsAllocation *SVMAllocsManager::MapBasedAllocationTracker::get(const void
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager) : memoryManager(memoryManager) {
}
void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent) {
void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent, bool readOnly) {
if (size == 0)
return nullptr;
@ -73,6 +58,7 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent) {
if (!GA) {
return nullptr;
}
GA->setMemObjectsAllocationWithWritableFlags(!readOnly);
this->SVMAllocs.insert(*GA);
return GA->getUnderlyingBuffer();
@ -91,4 +77,8 @@ void SVMAllocsManager::freeSVMAlloc(void *ptr) {
memoryManager->freeGraphicsMemory(GA);
}
}
bool SVMAllocsManager::memFlagIsReadOnly(cl_svm_mem_flags flags) {
return (flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) != 0;
}
} // namespace OCLRT

View File

@ -1,29 +1,15 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (C) 2017-2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
* SPDX-License-Identifier: MIT
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstdint>
#include <map>
#include <mutex>
#include "CL/cl.h"
namespace OCLRT {
class Device;
@ -45,10 +31,11 @@ class SVMAllocsManager {
};
SVMAllocsManager(MemoryManager *memoryManager);
void *createSVMAlloc(size_t size, bool coherent = false);
void *createSVMAlloc(size_t size, bool coherent, bool readOnly);
GraphicsAllocation *getSVMAlloc(const void *ptr);
void freeSVMAlloc(void *ptr);
size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); }
static bool memFlagIsReadOnly(cl_svm_mem_flags flags);
protected:
MapBasedAllocationTracker SVMAllocs;

View File

@ -91,6 +91,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algor
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache")
DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")

View File

@ -173,6 +173,34 @@ TEST_F(clSetKernelExecInfoTests, success_SvmPtrListWithMultiplePointers) {
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(3u, pMockKernel->getKernelSvmGfxAllocations().size());
EXPECT_TRUE(pMockKernel->svmAllocationsRequireCacheFlush);
clSVMFree(pContext, ptrSvm1);
clSVMFree(pContext, ptrSvm2);
}
}
TEST_F(clSetKernelExecInfoTests, givenReadOnlySvmPtrListWhenUsedAsKernelPointersThenNoCacheFlushRequire) {
if (svmCapabilities != 0) {
void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4);
EXPECT_NE(nullptr, ptrSvm1);
void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4);
EXPECT_NE(nullptr, ptrSvm2);
void *pSvmPtrList[] = {ptrSvm1, ptrSvm2};
size_t SvmPtrListSizeInBytes = 2 * sizeof(void *);
retVal = clSetKernelExecInfo(
pMockKernel, // cl_kernel kernel
CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name
SvmPtrListSizeInBytes, // size_t param_value_size
pSvmPtrList // const void *param_value
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(2u, pMockKernel->getKernelSvmGfxAllocations().size());
EXPECT_FALSE(pMockKernel->svmAllocationsRequireCacheFlush);
clSVMFree(pContext, ptrSvm1);
clSVMFree(pContext, ptrSvm2);

View File

@ -78,11 +78,11 @@ struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, pu
std::unique_ptr<MockProgram> program;
SKernelBinaryHeaderCommon kernelHeader;
SPatchDataParameterStream dataParameterStream;
SPatchExecutionEnvironment executionEnvironment;
SPatchThreadPayload threadPayload;
SPatchSamplerStateArray samplerArray;
SKernelBinaryHeaderCommon kernelHeader = {};
SPatchDataParameterStream dataParameterStream = {};
SPatchExecutionEnvironment executionEnvironment = {};
SPatchThreadPayload threadPayload = {};
SPatchSamplerStateArray samplerArray = {};
KernelInfo kernelInfo;
KernelInfo kernelInfoWithSampler;
@ -111,7 +111,7 @@ HWTEST_F(DispatchWalkerTest, shouldntChangeCommandStreamMemory) {
// Consume all memory except what is needed for this enqueue
auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) +
KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
//cs has a minimum required size
auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize;
@ -160,7 +160,7 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) {
// Consume all memory except what is needed for this enqueue
auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) +
KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
//cs has a minimum required size
auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize;

View File

@ -10,7 +10,9 @@
#include "runtime/memory_manager/allocations_list.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
#include "unit_tests/gen_common/gen_commands_common_validation.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/mocks/mock_csr.h"
#include "unit_tests/mocks/mock_command_queue.h"
@ -843,3 +845,30 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTrans
EXPECT_EQ(1u, cmdQ.waitCalled);
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
MockKernelWithInternals mockKernel(*pDevice, context);
CommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
size_t gws[3] = {1, 0, 0};
mockKernel.mockKernel->svmAllocationsRequireCacheFlush = true;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(cmdQ.getCS(0), 0);
auto itorCmd = find<GPGPU_WALKER *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.cmdList.end(), itorCmd);
++itorCmd;
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorCmd);
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}

View File

@ -25,9 +25,9 @@ struct EnqueueSvmMemCopyTest : public DeviceFixture,
void SetUp() override {
DeviceFixture::SetUp();
CommandQueueFixture::SetUp(pDevice, 0);
srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
ASSERT_NE(nullptr, srcSvmPtr);
dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
ASSERT_NE(nullptr, dstSvmPtr);
srcSvmAlloc = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr);
ASSERT_NE(nullptr, srcSvmAlloc);

View File

@ -27,7 +27,7 @@ struct EnqueueSvmMemFillTest : public DeviceFixture,
CommandQueueFixture::SetUp(pDevice, 0);
patternSize = (size_t)GetParam();
ASSERT_TRUE((0 < patternSize) && (patternSize <= 128));
svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, true);
svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
ASSERT_NE(nullptr, svmPtr);
svmAlloc = context->getSVMAllocsManager()->getSVMAlloc(svmPtr);
ASSERT_NE(nullptr, svmAlloc);

View File

@ -33,7 +33,7 @@ struct EnqueueSvmTest : public DeviceFixture,
void SetUp() override {
DeviceFixture::SetUp();
CommandQueueFixture::SetUp(pDevice, 0);
ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
}
void TearDown() override {
@ -238,7 +238,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueDstPtrIsNull) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
void *pDstSVM = nullptr;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
@ -269,7 +269,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueSrcPtrIsNull) {
TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
@ -285,7 +285,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlocking_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
retVal = this->pCmdQ->enqueueSVMMemcpy(
true, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
@ -301,7 +301,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlocking_Success) {
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlockedOnEvent_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
UserEvent uEvent;
cl_event eventWaitList[] = {&uEvent};
retVal = this->pCmdQ->enqueueSVMMemcpy(
@ -319,7 +319,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlockedOnEvent_Success) {
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherent_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
@ -335,7 +335,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherent_Success) {
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherentBlockedOnEvent_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true);
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
UserEvent uEvent;
cl_event eventWaitList[] = {&uEvent};
retVal = this->pCmdQ->enqueueSVMMemcpy(
@ -522,7 +522,7 @@ TEST_F(EnqueueSvmTest, concurentMapAccess) {
auto allocSvm = [&](uint32_t from, uint32_t to) {
for (uint32_t i = from; i <= to; i++) {
svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1);
svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1, false, false);
auto ga = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]);
EXPECT_NE(nullptr, ga);
EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]);

View File

@ -757,8 +757,8 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueFillImageWhenZeroSizeEnqueueIsDetect
HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetectedThenCommandMarkerShouldBeEnqueued) {
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
size_t zeroSize = 0;
mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, nullptr);
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_MARKER), mockCmdQ->lastCommandType);
@ -771,8 +771,8 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetect
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
cl_event event;
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
size_t zeroSize = 0;
mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, &event);
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_MARKER), mockCmdQ->lastCommandType);
@ -793,7 +793,7 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetect
HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemFillWhenZeroSizeEnqueueIsDetectedThenCommandMarkerShouldBeEnqueued) {
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
const float pattern[1] = {1.2345f};
size_t zeroSize = 0;
mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, nullptr);
@ -806,7 +806,7 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemFillWhenZeroSizeEnqueueIsDetec
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
cl_event event;
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
const float pattern[1] = {1.2345f};
size_t zeroSize = 0;
mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, &event);

View File

@ -648,7 +648,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCalling
TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) {
void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr);

View File

@ -10,7 +10,8 @@
namespace OCLRT {
void DeviceFixture::SetUp() {
SetUpImpl(nullptr);
hwInfoHelper = *platformDevices[0];
SetUpImpl(&hwInfoHelper);
}
void DeviceFixture::SetUpImpl(const OCLRT::HardwareInfo *hardwareInfo) {

View File

@ -46,3 +46,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) {
GEN8TEST_F(Gen8DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) {
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
}
GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckSupportCacheFlushAfterWalkerThenFalse) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker);
}

View File

@ -59,3 +59,7 @@ GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThe
GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) {
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
}
GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckSupportCacheFlushAfterWalkerThenFalse) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker);
}

View File

@ -30,6 +30,7 @@ set(IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands_tests.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp

View File

@ -5,56 +5,52 @@
*
*/
#include "runtime/built_ins/built_ins.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "hw_cmds.h"
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/kernel/kernel.h"
#include "unit_tests/fixtures/context_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/image_fixture.h"
#include "runtime/memory_manager/svm_memory_manager.h"
#include "unit_tests/fixtures/execution_model_kernel_fixture.h"
#include "unit_tests/fixtures/image_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/helpers/kernel_commands_tests.h"
#include "unit_tests/indirect_heap/indirect_heap_fixture.h"
#include "unit_tests/fixtures/built_in_fixture.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_program.h"
#include "unit_tests/mocks/mock_context.h"
#include "test.h"
#include <memory>
#include "unit_tests/mocks/mock_graphics_allocation.h"
using namespace OCLRT;
struct KernelCommandsTest : DeviceFixture,
ContextFixture,
BuiltInFixture,
::testing::Test {
void KernelCommandsTest::SetUp() {
DeviceFixture::SetUp();
ASSERT_NE(nullptr, pDevice);
cl_device_id device = pDevice;
ContextFixture::SetUp(1, &device);
ASSERT_NE(nullptr, pContext);
BuiltInFixture::SetUp(pDevice);
ASSERT_NE(nullptr, pBuiltIns);
using BuiltInFixture::SetUp;
using ContextFixture::SetUp;
mockKernelWithInternal = std::make_unique<MockKernelWithInternals>(*pDevice, pContext);
}
void SetUp() override {
DeviceFixture::SetUp();
ASSERT_NE(nullptr, pDevice);
cl_device_id device = pDevice;
ContextFixture::SetUp(1, &device);
ASSERT_NE(nullptr, pContext);
BuiltInFixture::SetUp(pDevice);
ASSERT_NE(nullptr, pBuiltIns);
}
void KernelCommandsTest::TearDown() {
mockKernelWithInternal.reset(nullptr);
BuiltInFixture::TearDown();
ContextFixture::TearDown();
DeviceFixture::TearDown();
}
void TearDown() override {
BuiltInFixture::TearDown();
ContextFixture::TearDown();
DeviceFixture::TearDown();
}
size_t sizeRequiredCS;
size_t sizeRequiredISH;
};
void KernelCommandsTest::addSpaceForSingleKernelArg() {
kernelArguments.resize(1);
kernelArguments[0] = kernelArgInfo;
mockKernelWithInternal->kernelInfo.resizeKernelArgInfoAndRegisterParameter(1);
mockKernelWithInternal->kernelInfo.kernelArgInfo.resize(1);
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0;
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t);
mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments);
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, programInterfaceDescriptorDataResourceUsage) {
CommandQueueHw<FamilyType> cmdQ(pContext, pDevice, 0);
@ -202,11 +198,10 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSe
auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
IndirectHeap indirectHeap(nonInternalAllocation, false);
MockKernelWithInternals mockKernelWithInternal(*pDevice);
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize();
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap,
*mockKernelWithInternal.mockKernel,
*mockKernelWithInternal->mockKernel,
false,
nullptr,
sizeCrossThreadData);
@ -219,11 +214,10 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendC
IndirectHeap indirectHeap(internalAllocation, true);
auto expectedOffset = internalAllocation->getGpuAddressToPatch();
MockKernelWithInternals mockKernelWithInternal(*pDevice);
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize();
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap,
*mockKernelWithInternal.mockKernel,
*mockKernelWithInternal->mockKernel,
false,
nullptr,
sizeCrossThreadData);
@ -358,7 +352,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, sendIndirectStateResourceUsage)
EXPECT_GE(sizeRequiredSSH, usedAfterSSH - usedBeforeSSH);
auto usedAfterCS = commandStream.getUsed();
EXPECT_GE(KernelCommandsHelper<FamilyType>::getSizeRequiredCS(), usedAfterCS - usedBeforeCS);
EXPECT_GE(KernelCommandsHelper<FamilyType>::getSizeRequiredCS(kernel), usedAfterCS - usedBeforeCS);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorContainsCorrectBindingTableEntryCount) {
@ -370,10 +364,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
MockKernelWithInternals mockKernel(*pDevice, pContext);
auto expectedBindingTableCount = 3u;
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
@ -386,8 +378,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE
dsh,
ioh,
ssh,
*mockKernel.mockKernel,
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
interfaceDescriptorIndex,
@ -415,11 +407,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
MockKernelWithInternals mockKernel(*pDevice, pContext);
auto expectedBindingTableCount = 3u;
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
auto isScheduler = const_cast<bool *>(&mockKernel.mockKernel->isSchedulerKernel);
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
auto isScheduler = const_cast<bool *>(&mockKernelWithInternal->mockKernel->isSchedulerKernel);
*isScheduler = true;
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
@ -433,8 +423,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn
dsh,
ioh,
ssh,
*mockKernel.mockKernel,
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
interfaceDescriptorIndex,
@ -458,10 +448,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
MockKernelWithInternals mockKernel(*pDevice, pContext);
auto expectedBindingTableCount = 100u;
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
@ -474,8 +462,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn
dsh,
ioh,
ssh,
*mockKernel.mockKernel,
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
interfaceDescriptorIndex,
@ -981,7 +969,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
MockKernelWithInternals kernelInternals(*pDevice);
const size_t localWorkSizes[3]{1, 1, 1};
auto &commandStream = cmdQ.getCS(1024);
@ -1007,8 +994,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
memset(mockDsh, 6, borderColorSize);
memset(mockDsh + borderColorSize, 8, borderColorSize);
kernelInternals.kernelInfo.heapInfo.pDsh = mockDsh;
kernelInternals.kernelInfo.patchInfo.samplerStateArray = &samplerStateArray;
mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh;
mockKernelWithInternal->kernelInfo.patchInfo.samplerStateArray = &samplerStateArray;
uint64_t interfaceDescriptorTableOffset = dsh.getUsed();
dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA));
@ -1024,16 +1011,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
pSamplerState[i].setIndirectStatePointer(0);
}
MockKernel *kernel = new MockKernel(kernelInternals.mockProgram, kernelInternals.kernelInfo, *pDevice);
kernel->setCrossThreadData(kernelInternals.crossThreadData, sizeof(kernelInternals.crossThreadData));
kernel->setSshLocal(kernelInternals.sshLocal, sizeof(kernelInternals.sshLocal));
mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData));
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
uint32_t interfaceDescriptorIndex = 0;
KernelCommandsHelper<FamilyType>::sendIndirectState(
commandStream,
dsh,
ioh,
ssh,
*kernel,
*mockKernelWithInternal->mockKernel,
8,
localWorkSizes,
interfaceDescriptorTableOffset,
@ -1081,7 +1067,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
EXPECT_EQ(borderColorOffset, pSamplerStatesCopied[i].getIndirectStatePointer());
}
delete kernel;
delete[] mockDsh;
}
@ -1207,11 +1192,10 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThe
uint32_t crossThreadData[8];
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel));
}
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) {
@ -1220,45 +1204,179 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInline
uint32_t crossThreadData[8];
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 0;
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0;
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel));
}
HWTEST_F(KernelCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) {
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
}
HWTEST_F(KernelCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) {
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
}
HWTEST_F(KernelCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) {
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1;
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
}
HWTEST_F(KernelCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) {
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
MockGraphicsAllocation globalAllocation;
mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation);
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
EXPECT_EQ(expectedSize, actualSize);
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(commandStream);
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());
mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true;
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
EXPECT_EQ(expectedSize, actualSize);
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(commandStream);
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
addSpaceForSingleKernelArg();
MockGraphicsAllocation cacheRequiringAllocation;
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
EXPECT_EQ(expectedSize, actualSize);
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(commandStream);
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
addSpaceForSingleKernelArg();
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
EXPECT_EQ(expectedSize, actualSize);
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(commandStream);
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_EQ(nullptr, pipeControl);
}
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheFlushCommand) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
hwInfoHelper.capabilityTable.supportCacheFlushAfterWalker = false;
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
addSpaceForSingleKernelArg();
MockGraphicsAllocation cacheRequiringAllocation;
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
EXPECT_EQ(expectedSize, actualSize);
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(commandStream);
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_EQ(nullptr, pipeControl);
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/built_ins/built_ins.h"
#include "runtime/kernel/kernel.h"
#include "unit_tests/fixtures/context_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/built_in_fixture.h"
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_graphics_allocation.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_program.h"
#include "test.h"
#include <memory>
using namespace OCLRT;
struct KernelCommandsTest : DeviceFixture,
ContextFixture,
BuiltInFixture,
::testing::Test {
using BuiltInFixture::SetUp;
using ContextFixture::SetUp;
void SetUp() override;
void TearDown() override;
void addSpaceForSingleKernelArg();
size_t sizeRequiredCS;
size_t sizeRequiredISH;
std::unique_ptr<MockKernelWithInternals> mockKernelWithInternal;
Kernel::SimpleKernelArgInfo kernelArgInfo = {};
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
};

View File

@ -508,7 +508,7 @@ TEST_F(CloneKernelTest, cloneKernelWithArgImmediate) {
}
TEST_F(CloneKernelTest, cloneKernelWithExecInfo) {
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256);
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, false, false);
ASSERT_NE(nullptr, ptrSVM);
GraphicsAllocation *pSvmAlloc = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);

View File

@ -167,3 +167,42 @@ TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAre
EXPECT_EQ(0u, *pKernelArg32bit);
EXPECT_NE(expValue, *pKernelArg64bit);
}
TEST_F(KernelArgBufferTest, givenWritebleBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
auto buffer = std::make_unique<MockBuffer>();
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true);
buffer->mockGfxAllocation.flushL3Required = false;
auto val = static_cast<cl_mem>(buffer.get());
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
auto buffer = std::make_unique<MockBuffer>();
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
buffer->mockGfxAllocation.flushL3Required = true;
auto val = static_cast<cl_mem>(buffer.get());
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
auto buffer = std::make_unique<MockBuffer>();
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
buffer->mockGfxAllocation.flushL3Required = false;
auto val = static_cast<cl_mem>(buffer.get());
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
}

View File

@ -412,3 +412,90 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenWritebleSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
svmAlloc.flushL3Required = false;
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.flushL3Required = true;
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.flushL3Required = false;
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
svmAlloc.flushL3Required = false;
pKernel->setKernelExecInfo(&svmAlloc);
EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush);
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.flushL3Required = true;
pKernel->setKernelExecInfo(&svmAlloc);
EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush);
alignedFree(svmPtr);
}
TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) {
size_t svmSize = 4096;
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.flushL3Required = false;
pKernel->setKernelExecInfo(&svmAlloc);
EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush);
alignedFree(svmPtr);
}

View File

@ -259,3 +259,39 @@ TEST_F(KernelImageArgTest, givenKernelWithSharedImageWhenSetArgCalledThenUsingSh
EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched);
EXPECT_TRUE(pKernel->isUsingSharedObjArgs());
}
TEST_F(KernelImageArgTest, givenWritebleImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
MockImageBase image;
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(true);
image.graphicsAllocation->flushL3Required = false;
cl_mem imageObj = &image;
pKernel->setArg(0, sizeof(imageObj), &imageObj);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
MockImageBase image;
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false);
image.graphicsAllocation->flushL3Required = true;
cl_mem imageObj = &image;
pKernel->setArg(0, sizeof(imageObj), &imageObj);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
}
TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
MockImageBase image;
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false);
image.graphicsAllocation->flushL3Required = false;
cl_mem imageObj = &image;
pKernel->setArg(0, sizeof(imageObj), &imageObj);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
}

View File

@ -2355,3 +2355,101 @@ TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkK
kernel.mockKernel->initialize();
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
}
TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast<GraphicsAllocation *>(0x1);
kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr);
EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
}
TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) {
MockGraphicsAllocation mockAllocation;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockAllocation.setMemObjectsAllocationWithWritableFlags(false);
mockAllocation.flushL3Required = true;
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
}
TEST(KernelTest, whenAllocationWriteableThenAssignAllocationPointerToCacheFlushVector) {
MockGraphicsAllocation mockAllocation;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockAllocation.setMemObjectsAllocationWithWritableFlags(true);
mockAllocation.flushL3Required = false;
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
}
TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) {
MockGraphicsAllocation mockAllocation;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast<GraphicsAllocation *>(0x1);
mockAllocation.setMemObjectsAllocationWithWritableFlags(false);
mockAllocation.flushL3Required = false;
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
}
TEST(KernelTest, givenEnableCacheFlushFlagIsEnableWhenPlatformDoesNotSupportThenOverrideAndReturnSupportTrue) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
HardwareInfo localHwInfo = *platformDevices[0];
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockKernelWithInternals kernel(*device);
EXPECT_TRUE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
}
TEST(KernelTest, givenEnableCacheFlushFlagIsDisableWhenPlatformSupportsThenOverrideAndReturnSupportFalse) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(0);
HardwareInfo localHwInfo = *platformDevices[0];
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockKernelWithInternals kernel(*device);
EXPECT_FALSE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
}
TEST(KernelTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformDoesNotSupportThenReturnSupportFalse) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
HardwareInfo localHwInfo = *platformDevices[0];
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockKernelWithInternals kernel(*device);
EXPECT_FALSE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
}
TEST(KernelTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformSupportsThenReturnSupportTrue) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
HardwareInfo localHwInfo = *platformDevices[0];
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockKernelWithInternals kernel(*device);
EXPECT_TRUE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
}

View File

@ -260,7 +260,7 @@ TEST_F(BufferSetArgTest, clSetKernelArgBuffer) {
}
TEST_F(BufferSetArgTest, clSetKernelArgSVMPointer) {
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256);
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, false, false);
EXPECT_NE(nullptr, ptrSVM);
GraphicsAllocation *pSvmAlloc = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);

View File

@ -494,7 +494,7 @@ TEST_F(RenderCompressedBuffersTests, givenDebugVariableSetWhenHwFlagIsNotSetThen
TEST_F(RenderCompressedBuffersTests, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) {
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
auto svmAlloc = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), false);
auto svmAlloc = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), false, false);
buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmAlloc, retVal));
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
@ -878,7 +878,7 @@ TEST_P(ValidHostPtr, failedAllocationInjection) {
TEST_P(ValidHostPtr, SvmHostPtr) {
const DeviceInfo &devInfo = pDevice->getDeviceInfo();
if (devInfo.svmCapabilities != 0) {
auto ptr = context->getSVMAllocsManager()->createSVMAlloc(64, false);
auto ptr = context->getSVMAllocsManager()->createSVMAlloc(64, false, false);
auto bufferSvm = Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 64, ptr, retVal);
EXPECT_NE(nullptr, bufferSvm);

View File

@ -31,7 +31,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocCreateNullFreeNull) {
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
{
SVMAllocsManager svmM(&memoryManager);
char *Ptr1 = (char *)svmM.createSVMAlloc(0);
char *Ptr1 = (char *)svmM.createSVMAlloc(0, false, false);
EXPECT_EQ(Ptr1, nullptr);
svmM.freeSVMAlloc(nullptr);
}
@ -42,7 +42,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocCreateFree) {
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
{
SVMAllocsManager svmM(&memoryManager);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
EXPECT_NE(Ptr1, nullptr);
svmM.freeSVMAlloc(Ptr1);
@ -72,7 +72,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocGetBeforeAndInside) {
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
{
SVMAllocsManager svmM(&memoryManager);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
EXPECT_NE(Ptr1, nullptr);
char *Ptr2 = Ptr1 - 4;
@ -93,7 +93,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocgetAfterSVM) {
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
{
SVMAllocsManager svmM(&memoryManager);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
EXPECT_NE(Ptr1, nullptr);
char *Ptr2 = Ptr1 + 4096 + 100;
@ -129,7 +129,7 @@ TEST_F(SVMMemoryAllocatorTest, WhenCouldNotAllocateInMemoryManagerThenReturnsNul
MockMemManager memoryManager(executionEnvironment);
{
MockSVMAllocsManager svmM{&memoryManager};
void *svmPtr = svmM.createSVMAlloc(512);
void *svmPtr = svmM.createSVMAlloc(512, false, false);
EXPECT_EQ(nullptr, svmPtr);
EXPECT_EQ(0U, svmM.GetSVMAllocs().getNumAllocs());
@ -151,3 +151,28 @@ TEST_F(SVMMemoryAllocatorTest, given64kbAllowedwhenAllocatingSvmMemoryThenDontPr
myMemoryManager.allocateGraphicsMemoryForSVM(1, false);
EXPECT_FALSE(myMemoryManager.preferRenderCompressedFlag);
}
TEST_F(SVMMemoryAllocatorTest, whenReadOnlyFlagIsPresentThenReturnTrue) {
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_ONLY));
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_HOST_READ_ONLY));
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_ONLY));
}
TEST_F(SVMMemoryAllocatorTest, whenNoReadOnlyFlagIsPresentThenReturnFalse) {
EXPECT_FALSE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_WRITE));
EXPECT_FALSE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_WRITE_ONLY));
}
TEST_F(SVMMemoryAllocatorTest, whenReadOnlySvmAllocationCreatedThenGraphicsAllocationHasWriteableFlagFalse) {
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
SVMAllocsManager svmM(&memoryManager);
void *svm = svmM.createSVMAlloc(4096, false, true);
EXPECT_NE(nullptr, svm);
GraphicsAllocation *svmAllocation = svmM.getSVMAlloc(svm);
EXPECT_NE(nullptr, svmAllocation);
EXPECT_FALSE(svmAllocation->isMemObjectsAllocationWithWritableFlags());
svmM.freeSVMAlloc(svm);
}

View File

@ -23,10 +23,14 @@ namespace OCLRT {
////////////////////////////////////////////////////////////////////////////////
class MockKernel : public Kernel {
public:
using Kernel::addAllocationToCacheFlushVector;
using Kernel::auxTranslationRequired;
using Kernel::isSchedulerKernel;
using Kernel::kernelArgRequiresCacheFlush;
using Kernel::kernelArguments;
using Kernel::numberOfBindingTableStates;
using Kernel::platformSupportCacheFlushAfterWalker;
using Kernel::svmAllocationsRequireCacheFlush;
struct BlockPatchValues {
uint64_t offset;
@ -256,6 +260,7 @@ class MockKernelWithInternals {
threadPayload.LocalIDZPresent = 1;
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
kernelInfo.heapInfo.pSsh = sshLocal;
kernelInfo.heapInfo.pDsh = dshLocal;
kernelInfo.heapInfo.pKernelHeader = &kernelHeader;
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
@ -298,6 +303,7 @@ class MockKernelWithInternals {
uint32_t kernelIsa[32];
char crossThreadData[256];
char sshLocal[128];
char dshLocal[128];
};
class MockParentKernel : public Kernel {

View File

@ -63,10 +63,10 @@ struct ProfilingTests : public CommandEnqueueFixture,
std::unique_ptr<MockProgram> program;
SKernelBinaryHeaderCommon kernelHeader;
SPatchDataParameterStream dataParameterStream;
SKernelBinaryHeaderCommon kernelHeader = {};
SPatchDataParameterStream dataParameterStream = {};
SPatchExecutionEnvironment executionEnvironment = {};
SPatchThreadPayload threadPayload;
SPatchThreadPayload threadPayload = {};
KernelInfo kernelInfo;
uint32_t kernelIsa[32];
@ -78,15 +78,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
MockKernel kernel(program.get(), kernelInfo, *pDevice);
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, false, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, nullptr);
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, false, &kernel);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, nullptr);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, &kernel);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
}
@ -114,16 +116,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
MockKernel kernel(program.get(), kernelInfo, *pDevice);
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
requiredSize += 2 * sizeof(GPGPU_WALKER);
MockKernel kernel(program.get(), kernelInfo, *pDevice);
DispatchInfo dispatchInfo;
dispatchInfo.setKernel(&kernel);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.push(dispatchInfo);
multiDispatchInfo.push(dispatchInfo);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, &kernel);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, 0, true, false, *pCmdQ, multiDispatchInfo);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
@ -525,19 +528,21 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
pCmdQ->setPerfCountersEnabled(true, 1);
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
MockKernel kernel(program.get(), kernelInfo, *pDevice);
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
//begin perf cmds
requiredSize += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
//end perf cmds
requiredSize += 2 * sizeof(PIPE_CONTROL) + 3 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, true, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, true, *pCmdQ, nullptr);
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, true, &kernel);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, true, *pCmdQ, &kernel);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, true, *pCmdQ, nullptr);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, &kernel);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, true, *pCmdQ, &kernel);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
bool retVal = false;
@ -576,9 +581,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT;
MockKernel kernel(program.get(), kernelInfo, *pDevice);
pCmdQ->setPerfCountersEnabled(true, 1);
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
requiredSize += 2 * sizeof(GPGPU_WALKER);
//begin perf cmds
@ -586,13 +593,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
//end perf cmds
requiredSize += 2 * sizeof(PIPE_CONTROL) + 3 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
MockKernel kernel(program.get(), kernelInfo, *pDevice);
DispatchInfo dispatchInfo;
dispatchInfo.setKernel(&kernel);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.push(dispatchInfo);
multiDispatchInfo.push(dispatchInfo);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, &kernel);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, 0, true, true, *pCmdQ, multiDispatchInfo);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);

View File

@ -101,3 +101,4 @@ EnableMakeResidentOnMapGpuVa = 0
RenderCompressedImagesEnabled = -1
RenderCompressedBuffersEnabled = -1
AUBDumpForceAllToLocalMemory = 0
EnableCacheFlushAfterWalker = 0