Add cache flush command after WALKER command
Change-Id: I3983dc6c0797047e17cc8189655a22a22e85892b
This commit is contained in:
parent
9e81469d9f
commit
3dca095ccf
|
@ -3405,7 +3405,8 @@ void *CL_API_CALL clSVMAlloc(cl_context context,
|
|||
return pAlloc;
|
||||
}
|
||||
|
||||
pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(size, !!(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER));
|
||||
pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(size, !!(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER),
|
||||
SVMAllocsManager::memFlagIsReadOnly(flags));
|
||||
|
||||
if (pContext->isProvidingPerformanceHints()) {
|
||||
pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, pAlloc, size);
|
||||
|
|
|
@ -418,7 +418,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool res
|
|||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
|
||||
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
|
||||
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(pKernel) +
|
||||
sizeof(PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
|
||||
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(commandQueue.getDevice());
|
||||
if (reserveProfilingCmdsSpace) {
|
||||
|
|
|
@ -191,6 +191,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);
|
||||
|
||||
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel);
|
||||
|
||||
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
|
||||
|
|
|
@ -38,33 +38,36 @@ const PLATFORM CNL::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable CNL::capabilityTable{0,
|
||||
83.333,
|
||||
21,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
true, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, true},
|
||||
&isSimulationCNL,
|
||||
true,
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
|
||||
true, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
true,
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Cnl,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable CNL::capabilityTable{
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
{true, true}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
83.333, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationCNL, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
21, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Cnl, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
true, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
true, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo CNL_2x5x8::hwInfo = {
|
||||
&CNL::platform,
|
||||
|
|
|
@ -43,7 +43,7 @@ size_t GpgpuWalkerHelper<BDWFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
|||
typedef typename BDWFamily::MI_MATH MI_MATH;
|
||||
typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||
size_t n = 0;
|
||||
if ((pKernel != nullptr) && pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
n += sizeof(PIPE_CONTROL) +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
|
|
|
@ -39,33 +39,36 @@ const PLATFORM BDW::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable BDW::capabilityTable{0,
|
||||
80,
|
||||
21,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
false, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::Disabled,
|
||||
{false, false},
|
||||
&isSimulationBDW,
|
||||
true,
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
{true, 50000, true, 5000, true, 200000}, // KmdNotifyProperties
|
||||
false, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
true, // isCore
|
||||
false, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Bdw,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable BDW::capabilityTable{
|
||||
{50000, 5000, 200000, true, true, true}, // kmdNotifyProperties
|
||||
{false, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
80, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationBDW, // isSimulation
|
||||
PreemptionMode::Disabled, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
21, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Bdw, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
false, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
false, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
true, // isCore
|
||||
false, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo BDW_1x2x6::hwInfo = {
|
||||
&BDW::platform,
|
||||
|
|
|
@ -43,7 +43,7 @@ size_t GpgpuWalkerHelper<SKLFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
|||
typedef typename SKLFamily::MI_MATH MI_MATH;
|
||||
typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||
size_t n = 0;
|
||||
if ((pKernel != nullptr) && pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
n += sizeof(PIPE_CONTROL) +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
|
|
|
@ -36,33 +36,36 @@ const PLATFORM BXT::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable BXT::capabilityTable{0,
|
||||
52.083,
|
||||
12,
|
||||
true,
|
||||
true,
|
||||
false, // ftrSvm
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, false},
|
||||
&isSimulationBXT,
|
||||
true,
|
||||
false, // forceStatelessCompilationFor32Bit
|
||||
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
|
||||
false, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
false, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Bxt,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable BXT::capabilityTable{
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
{true, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
52.083, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationBXT, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
12, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Bxt, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
false, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
false, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
false, // forceStatelessCompilationFor32Bit
|
||||
false, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo BXT_1x2x6::hwInfo = {
|
||||
&BXT::platform,
|
||||
|
|
|
@ -31,33 +31,36 @@ const PLATFORM CFL::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable CFL::capabilityTable{0,
|
||||
83.333,
|
||||
21,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, false},
|
||||
&isSimulationCFL,
|
||||
true,
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
|
||||
true, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Cfl,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable CFL::capabilityTable{
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
{true, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
83.333, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationCFL, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
21, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Cfl, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
true, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo CFL_1x2x6::hwInfo = {
|
||||
&CFL::platform,
|
||||
|
|
|
@ -31,33 +31,36 @@ const PLATFORM GLK::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable GLK::capabilityTable{0,
|
||||
52.083,
|
||||
12,
|
||||
true,
|
||||
true,
|
||||
false, // ftrSvm
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, false},
|
||||
&isSimulationGLK,
|
||||
true,
|
||||
false, // forceStatelessCompilationFor32Bit
|
||||
{true, 30000, false, 0, false, 0}, // KmdNotifyProperties
|
||||
false, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
false, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Glk,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable GLK::capabilityTable{
|
||||
{30000, 0, 0, true, false, false}, // kmdNotifyProperties
|
||||
{true, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
52.083, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationGLK, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
12, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Glk, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
false, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
false, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
false, // forceStatelessCompilationFor32Bit
|
||||
false, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo GLK_1x3x6::hwInfo = {
|
||||
&GLK::platform,
|
||||
|
|
|
@ -31,33 +31,36 @@ const PLATFORM KBL::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable KBL::capabilityTable{0,
|
||||
83.333,
|
||||
21,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, false},
|
||||
&isSimulationKBL,
|
||||
true,
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
|
||||
true, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Kbl,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable KBL::capabilityTable{
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
{true, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
83.333, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationKBL, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
21, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Kbl, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
true, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo KBL_1x2x6::hwInfo = {
|
||||
&KBL::platform,
|
||||
|
|
|
@ -39,33 +39,36 @@ const PLATFORM SKL::platform = {
|
|||
0, // usRevId_PCH
|
||||
GTTYPE_UNDEFINED};
|
||||
|
||||
const RuntimeCapabilityTable SKL::capabilityTable{0,
|
||||
83.333,
|
||||
21,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
PreemptionMode::MidThread,
|
||||
{true, false},
|
||||
&isSimulationSKL,
|
||||
true,
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
|
||||
true, // ftr64KBpages
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
CmdServicesMemTraceVersion::DeviceValues::Skl,
|
||||
0, // extraQuantityThreadsPerEU
|
||||
true, // SupportsVme
|
||||
64, // slmSize
|
||||
MemoryConstants::max48BitAddress}; // gpuAddressSpace
|
||||
const RuntimeCapabilityTable SKL::capabilityTable{
|
||||
{0, 0, 0, false, false, false}, // kmdNotifyProperties
|
||||
{true, false}, // whitelistedRegisters
|
||||
MemoryConstants::max48BitAddress, // gpuAddressSpace
|
||||
83.333, // defaultProfilingTimerResolution
|
||||
MemoryConstants::pageSize, // requiredPreemptionSurfaceSize
|
||||
&isSimulationSKL, // isSimulation
|
||||
PreemptionMode::MidThread, // defaultPreemptionMode
|
||||
EngineType::ENGINE_RCS, // defaultEngineType
|
||||
0, // maxRenderFrequency
|
||||
21, // clVersionSupport
|
||||
CmdServicesMemTraceVersion::DeviceValues::Skl, // aubDeviceId
|
||||
0, // extraQuantityThreadsPerEU
|
||||
64, // slmSize
|
||||
true, // ftrSupportsFP64
|
||||
true, // ftrSupports64BitMath
|
||||
true, // ftrSvm
|
||||
true, // ftrSupportsCoherency
|
||||
true, // ftrSupportsVmeAvcTextureSampler
|
||||
false, // ftrSupportsVmeAvcPreemption
|
||||
false, // ftrRenderCompressedBuffers
|
||||
false, // ftrRenderCompressedImages
|
||||
true, // ftr64KBpages
|
||||
true, // instrumentationEnabled
|
||||
true, // forceStatelessCompilationFor32Bit
|
||||
true, // isCore
|
||||
true, // sourceLevelDebuggerSupported
|
||||
true, // supportsVme
|
||||
false // supportCacheFlushAfterWalker
|
||||
};
|
||||
|
||||
const HardwareInfo SKL_1x2x6::hwInfo = {
|
||||
&SKL::platform,
|
||||
|
|
|
@ -32,10 +32,19 @@ struct WhitelistedRegisters {
|
|||
};
|
||||
|
||||
struct RuntimeCapabilityTable {
|
||||
uint32_t maxRenderFrequency;
|
||||
KmdNotifyProperties kmdNotifyProperties;
|
||||
WhitelistedRegisters whitelistedRegisters;
|
||||
uint64_t gpuAddressSpace;
|
||||
double defaultProfilingTimerResolution;
|
||||
|
||||
size_t requiredPreemptionSurfaceSize;
|
||||
bool (*isSimulation)(unsigned short);
|
||||
PreemptionMode defaultPreemptionMode;
|
||||
EngineType defaultEngineType;
|
||||
uint32_t maxRenderFrequency;
|
||||
unsigned int clVersionSupport;
|
||||
uint32_t aubDeviceId;
|
||||
uint32_t extraQuantityThreadsPerEU;
|
||||
uint32_t slmSize;
|
||||
bool ftrSupportsFP64;
|
||||
bool ftrSupports64BitMath;
|
||||
bool ftrSvm;
|
||||
|
@ -44,29 +53,13 @@ struct RuntimeCapabilityTable {
|
|||
bool ftrSupportsVmeAvcPreemption;
|
||||
bool ftrRenderCompressedBuffers;
|
||||
bool ftrRenderCompressedImages;
|
||||
PreemptionMode defaultPreemptionMode;
|
||||
WhitelistedRegisters whitelistedRegisters;
|
||||
|
||||
bool (*isSimulation)(unsigned short);
|
||||
bool instrumentationEnabled;
|
||||
|
||||
bool forceStatelessCompilationFor32Bit;
|
||||
|
||||
KmdNotifyProperties kmdNotifyProperties;
|
||||
|
||||
bool ftr64KBpages;
|
||||
|
||||
EngineType defaultEngineType;
|
||||
|
||||
size_t requiredPreemptionSurfaceSize;
|
||||
bool instrumentationEnabled;
|
||||
bool forceStatelessCompilationFor32Bit;
|
||||
bool isCore;
|
||||
bool sourceLevelDebuggerSupported;
|
||||
uint32_t aubDeviceId;
|
||||
|
||||
uint32_t extraQuantityThreadsPerEU;
|
||||
bool supportsVme;
|
||||
uint32_t slmSize;
|
||||
uint64_t gpuAddressSpace;
|
||||
bool supportCacheFlushAfterWalker;
|
||||
};
|
||||
|
||||
struct HardwareCapabilities {
|
||||
|
|
|
@ -142,7 +142,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
|||
const bool &kernelUsesLocalIds,
|
||||
Kernel &kernel);
|
||||
|
||||
static size_t getSizeRequiredCS();
|
||||
static size_t getSizeRequiredCS(const Kernel *kernel);
|
||||
static bool isPipeControlWArequired();
|
||||
static size_t getSizeRequiredDSH(
|
||||
const Kernel &kernel);
|
||||
|
@ -202,6 +202,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
|||
|
||||
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
|
||||
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
|
||||
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel);
|
||||
|
||||
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
|
||||
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
|
@ -43,9 +44,13 @@ uint32_t KernelCommandsHelper<GfxFamily>::additionalSizeRequiredDsh() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() {
|
||||
return 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
|
||||
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD);
|
||||
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(const Kernel *kernel) {
|
||||
size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
|
||||
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD);
|
||||
if (kernel->requiresCacheFlushCommand()) {
|
||||
size += sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -155,4 +160,14 @@ bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32
|
|||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel) {
|
||||
if (kernel->requiresCacheFlushCommand()) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pipeControl = GfxFamily::cmdInitPipeControl;
|
||||
pipeControl->setCommandStreamerStallEnable(true);
|
||||
pipeControl->setDcFlushEnable(true);
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -14,15 +14,15 @@
|
|||
|
||||
namespace OCLRT {
|
||||
struct KmdNotifyProperties {
|
||||
int64_t delayKmdNotifyMicroseconds;
|
||||
int64_t delayQuickKmdSleepMicroseconds;
|
||||
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
|
||||
// Main switch for KMD Notify optimization - if its disabled, all below are disabled too
|
||||
bool enableKmdNotify;
|
||||
int64_t delayKmdNotifyMicroseconds;
|
||||
// Use smaller delay in specific situations (ie. from AsyncEventsHandler)
|
||||
bool enableQuickKmdSleep;
|
||||
int64_t delayQuickKmdSleepMicroseconds;
|
||||
// If waits are called sporadically use QuickKmdSleep mode, otherwise use standard delay
|
||||
bool enableQuickKmdSleepForSporadicWaits;
|
||||
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
|
||||
};
|
||||
|
||||
namespace KmdNotifyConstants {
|
||||
|
|
|
@ -309,6 +309,7 @@ cl_int Kernel::initialize() {
|
|||
kernelArguments.resize(numArgs);
|
||||
slmSizes.resize(numArgs);
|
||||
kernelArgHandlers.resize(numArgs);
|
||||
kernelArgRequiresCacheFlush.resize(numArgs);
|
||||
|
||||
for (uint32_t i = 0; i < numArgs; ++i) {
|
||||
storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0);
|
||||
|
@ -849,6 +850,8 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
|
|||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
}
|
||||
addAllocationToCacheFlushVector(argIndex, svmAlloc);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -884,6 +887,9 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
|||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
}
|
||||
|
||||
addAllocationToCacheFlushVector(argIndex, svmAlloc);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -908,10 +914,14 @@ const Kernel::SimpleKernelArgInfo &Kernel::getKernelArgInfo(uint32_t argIndex) c
|
|||
|
||||
void Kernel::setKernelExecInfo(GraphicsAllocation *argValue) {
|
||||
kernelSvmGfxAllocations.push_back(argValue);
|
||||
if (allocationForCacheFlush(argValue)) {
|
||||
svmAllocationsRequireCacheFlush = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::clearKernelExecInfo() {
|
||||
kernelSvmGfxAllocations.clear();
|
||||
svmAllocationsRequireCacheFlush = false;
|
||||
}
|
||||
|
||||
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
|
@ -1119,7 +1129,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode);
|
||||
}
|
||||
|
||||
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
||||
return CL_SUCCESS;
|
||||
} else {
|
||||
|
||||
|
@ -1243,7 +1253,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
|
|||
patch<uint32_t, cl_channel_order>(imageFormat.image_channel_order, crossThreadData, kernelArgInfo.offsetChannelOrder);
|
||||
patch<uint32_t, uint32_t>(kernelArgInfo.offsetHeap, crossThreadData, kernelArgInfo.offsetObjectId);
|
||||
patch<uint32_t, cl_uint>(imageDesc.num_mip_levels, crossThreadData, kernelArgInfo.offsetNumMipLevels);
|
||||
|
||||
addAllocationToCacheFlushVector(argIndex, pImage->getGraphicsAllocation());
|
||||
retVal = CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -2122,4 +2132,51 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Kernel::platformSupportCacheFlushAfterWalker() const {
|
||||
int32_t dbgFlag = DebugManager.flags.EnableCacheFlushAfterWalker.get();
|
||||
if (dbgFlag == 1) {
|
||||
return true;
|
||||
} else if (dbgFlag == 0) {
|
||||
return false;
|
||||
}
|
||||
return device.getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker;
|
||||
}
|
||||
|
||||
bool Kernel::requiresCacheFlushCommand() const {
|
||||
if (platformSupportCacheFlushAfterWalker()) {
|
||||
if (getProgram()->getGlobalSurface() != nullptr) {
|
||||
return true;
|
||||
}
|
||||
if (svmAllocationsRequireCacheFlush) {
|
||||
return true;
|
||||
}
|
||||
size_t args = kernelArgRequiresCacheFlush.size();
|
||||
for (size_t i = 0; i < args; i++) {
|
||||
if (kernelArgRequiresCacheFlush[i] != nullptr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) {
|
||||
if (argAllocation->flushL3Required || argAllocation->isMemObjectsAllocationWithWritableFlags()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) {
|
||||
if (argAllocation == nullptr) {
|
||||
kernelArgRequiresCacheFlush[argIndex] = nullptr;
|
||||
} else {
|
||||
if (allocationForCacheFlush(argAllocation)) {
|
||||
kernelArgRequiresCacheFlush[argIndex] = argAllocation;
|
||||
} else {
|
||||
kernelArgRequiresCacheFlush[argIndex] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -374,6 +374,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
|
||||
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &buffersForAuxTranslation);
|
||||
|
||||
bool requiresCacheFlushCommand() const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
@ -461,6 +463,9 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
|
||||
void reconfigureKernel();
|
||||
|
||||
bool platformSupportCacheFlushAfterWalker() const;
|
||||
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
|
||||
bool allocationForCacheFlush(GraphicsAllocation *argAllocation);
|
||||
Program *program;
|
||||
Context *context;
|
||||
const Device &device;
|
||||
|
@ -493,5 +498,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
|
||||
bool specialPipelineSelectMode = false;
|
||||
bool svmAllocationsRequireCacheFlush = false;
|
||||
std::vector<GraphicsAllocation *> kernelArgRequiresCacheFlush;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -1,23 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
@ -64,7 +49,7 @@ GraphicsAllocation *SVMAllocsManager::MapBasedAllocationTracker::get(const void
|
|||
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager) : memoryManager(memoryManager) {
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent) {
|
||||
void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent, bool readOnly) {
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
|
@ -73,6 +58,7 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, bool coherent) {
|
|||
if (!GA) {
|
||||
return nullptr;
|
||||
}
|
||||
GA->setMemObjectsAllocationWithWritableFlags(!readOnly);
|
||||
this->SVMAllocs.insert(*GA);
|
||||
|
||||
return GA->getUnderlyingBuffer();
|
||||
|
@ -91,4 +77,8 @@ void SVMAllocsManager::freeSVMAlloc(void *ptr) {
|
|||
memoryManager->freeGraphicsMemory(GA);
|
||||
}
|
||||
}
|
||||
|
||||
bool SVMAllocsManager::memFlagIsReadOnly(cl_svm_mem_flags flags) {
|
||||
return (flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) != 0;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -1,29 +1,15 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include "CL/cl.h"
|
||||
|
||||
namespace OCLRT {
|
||||
class Device;
|
||||
|
@ -45,10 +31,11 @@ class SVMAllocsManager {
|
|||
};
|
||||
|
||||
SVMAllocsManager(MemoryManager *memoryManager);
|
||||
void *createSVMAlloc(size_t size, bool coherent = false);
|
||||
void *createSVMAlloc(size_t size, bool coherent, bool readOnly);
|
||||
GraphicsAllocation *getSVMAlloc(const void *ptr);
|
||||
void freeSVMAlloc(void *ptr);
|
||||
size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); }
|
||||
static bool memFlagIsReadOnly(cl_svm_mem_flags flags);
|
||||
|
||||
protected:
|
||||
MapBasedAllocationTracker SVMAllocs;
|
||||
|
|
|
@ -91,6 +91,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algor
|
|||
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")
|
||||
|
|
|
@ -173,6 +173,34 @@ TEST_F(clSetKernelExecInfoTests, success_SvmPtrListWithMultiplePointers) {
|
|||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(3u, pMockKernel->getKernelSvmGfxAllocations().size());
|
||||
EXPECT_TRUE(pMockKernel->svmAllocationsRequireCacheFlush);
|
||||
|
||||
clSVMFree(pContext, ptrSvm1);
|
||||
clSVMFree(pContext, ptrSvm2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(clSetKernelExecInfoTests, givenReadOnlySvmPtrListWhenUsedAsKernelPointersThenNoCacheFlushRequire) {
|
||||
if (svmCapabilities != 0) {
|
||||
void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4);
|
||||
EXPECT_NE(nullptr, ptrSvm1);
|
||||
|
||||
void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4);
|
||||
EXPECT_NE(nullptr, ptrSvm2);
|
||||
|
||||
void *pSvmPtrList[] = {ptrSvm1, ptrSvm2};
|
||||
size_t SvmPtrListSizeInBytes = 2 * sizeof(void *);
|
||||
|
||||
retVal = clSetKernelExecInfo(
|
||||
pMockKernel, // cl_kernel kernel
|
||||
CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name
|
||||
SvmPtrListSizeInBytes, // size_t param_value_size
|
||||
pSvmPtrList // const void *param_value
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(2u, pMockKernel->getKernelSvmGfxAllocations().size());
|
||||
EXPECT_FALSE(pMockKernel->svmAllocationsRequireCacheFlush);
|
||||
|
||||
clSVMFree(pContext, ptrSvm1);
|
||||
clSVMFree(pContext, ptrSvm2);
|
||||
|
|
|
@ -78,11 +78,11 @@ struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, pu
|
|||
|
||||
std::unique_ptr<MockProgram> program;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SPatchExecutionEnvironment executionEnvironment;
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchSamplerStateArray samplerArray;
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
SPatchSamplerStateArray samplerArray = {};
|
||||
|
||||
KernelInfo kernelInfo;
|
||||
KernelInfo kernelInfoWithSampler;
|
||||
|
@ -111,7 +111,7 @@ HWTEST_F(DispatchWalkerTest, shouldntChangeCommandStreamMemory) {
|
|||
|
||||
// Consume all memory except what is needed for this enqueue
|
||||
auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) +
|
||||
KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
|
||||
//cs has a minimum required size
|
||||
auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize;
|
||||
|
@ -160,7 +160,7 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) {
|
|||
|
||||
// Consume all memory except what is needed for this enqueue
|
||||
auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) +
|
||||
KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
|
||||
//cs has a minimum required size
|
||||
auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize;
|
||||
|
|
|
@ -10,7 +10,9 @@
|
|||
#include "runtime/memory_manager/allocations_list.h"
|
||||
#include "unit_tests/command_queue/enqueue_fixture.h"
|
||||
#include "unit_tests/fixtures/hello_world_fixture.h"
|
||||
#include "unit_tests/gen_common/gen_cmd_parse.h"
|
||||
#include "unit_tests/gen_common/gen_commands_common_validation.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
#include "unit_tests/mocks/mock_csr.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
|
@ -843,3 +845,30 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTrans
|
|||
EXPECT_EQ(1u, cmdQ.waitCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
CommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
|
||||
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
|
||||
mockKernel.mockKernel->svmAllocationsRequireCacheFlush = true;
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cmdQ.getCS(0), 0);
|
||||
auto itorCmd = find<GPGPU_WALKER *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), itorCmd);
|
||||
++itorCmd;
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorCmd);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ struct EnqueueSvmMemCopyTest : public DeviceFixture,
|
|||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
CommandQueueFixture::SetUp(pDevice, 0);
|
||||
srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
ASSERT_NE(nullptr, srcSvmPtr);
|
||||
dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
ASSERT_NE(nullptr, dstSvmPtr);
|
||||
srcSvmAlloc = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr);
|
||||
ASSERT_NE(nullptr, srcSvmAlloc);
|
||||
|
|
|
@ -27,7 +27,7 @@ struct EnqueueSvmMemFillTest : public DeviceFixture,
|
|||
CommandQueueFixture::SetUp(pDevice, 0);
|
||||
patternSize = (size_t)GetParam();
|
||||
ASSERT_TRUE((0 < patternSize) && (patternSize <= 128));
|
||||
svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, true);
|
||||
svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
|
||||
ASSERT_NE(nullptr, svmPtr);
|
||||
svmAlloc = context->getSVMAllocsManager()->getSVMAlloc(svmPtr);
|
||||
ASSERT_NE(nullptr, svmAlloc);
|
||||
|
|
|
@ -33,7 +33,7 @@ struct EnqueueSvmTest : public DeviceFixture,
|
|||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
CommandQueueFixture::SetUp(pDevice, 0);
|
||||
ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -238,7 +238,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueDstPtrIsNull) {
|
|||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableAsyncEventsHandler.set(false);
|
||||
void *pDstSVM = nullptr;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
pDstSVM, // void *dst_ptr
|
||||
|
@ -269,7 +269,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueSrcPtrIsNull) {
|
|||
|
||||
TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
|
||||
void *pDstSVM = ptrSVM;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
pDstSVM, // void *dst_ptr
|
||||
|
@ -285,7 +285,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
|
|||
|
||||
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlocking_Success) {
|
||||
void *pDstSVM = ptrSVM;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
true, // cl_bool blocking_copy
|
||||
pDstSVM, // void *dst_ptr
|
||||
|
@ -301,7 +301,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlocking_Success) {
|
|||
|
||||
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlockedOnEvent_Success) {
|
||||
void *pDstSVM = ptrSVM;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
UserEvent uEvent;
|
||||
cl_event eventWaitList[] = {&uEvent};
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
|
@ -319,7 +319,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyBlockedOnEvent_Success) {
|
|||
|
||||
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherent_Success) {
|
||||
void *pDstSVM = ptrSVM;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
false, // cl_bool blocking_copy
|
||||
pDstSVM, // void *dst_ptr
|
||||
|
@ -335,7 +335,7 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherent_Success) {
|
|||
|
||||
TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherentBlockedOnEvent_Success) {
|
||||
void *pDstSVM = ptrSVM;
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true);
|
||||
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, true, false);
|
||||
UserEvent uEvent;
|
||||
cl_event eventWaitList[] = {&uEvent};
|
||||
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||
|
@ -522,7 +522,7 @@ TEST_F(EnqueueSvmTest, concurentMapAccess) {
|
|||
|
||||
auto allocSvm = [&](uint32_t from, uint32_t to) {
|
||||
for (uint32_t i = from; i <= to; i++) {
|
||||
svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1);
|
||||
svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1, false, false);
|
||||
auto ga = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]);
|
||||
EXPECT_NE(nullptr, ga);
|
||||
EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]);
|
||||
|
|
|
@ -757,8 +757,8 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueFillImageWhenZeroSizeEnqueueIsDetect
|
|||
HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetectedThenCommandMarkerShouldBeEnqueued) {
|
||||
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
|
||||
|
||||
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
size_t zeroSize = 0;
|
||||
mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_MARKER), mockCmdQ->lastCommandType);
|
||||
|
@ -771,8 +771,8 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetect
|
|||
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
|
||||
|
||||
cl_event event;
|
||||
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
size_t zeroSize = 0;
|
||||
mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, &event);
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_MARKER), mockCmdQ->lastCommandType);
|
||||
|
@ -793,7 +793,7 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemcpyWhenZeroSizeEnqueueIsDetect
|
|||
HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemFillWhenZeroSizeEnqueueIsDetectedThenCommandMarkerShouldBeEnqueued) {
|
||||
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
|
||||
|
||||
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
const float pattern[1] = {1.2345f};
|
||||
size_t zeroSize = 0;
|
||||
mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, nullptr);
|
||||
|
@ -806,7 +806,7 @@ HWTEST_F(ZeroSizeEnqueueHandlerTest, enqueueSVMMemFillWhenZeroSizeEnqueueIsDetec
|
|||
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(&context, pDevice, 0));
|
||||
|
||||
cl_event event;
|
||||
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
const float pattern[1] = {1.2345f};
|
||||
size_t zeroSize = 0;
|
||||
mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, &event);
|
||||
|
|
|
@ -648,7 +648,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCalling
|
|||
|
||||
TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) {
|
||||
|
||||
void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
|
||||
pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr);
|
||||
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
|
||||
namespace OCLRT {
|
||||
void DeviceFixture::SetUp() {
|
||||
SetUpImpl(nullptr);
|
||||
hwInfoHelper = *platformDevices[0];
|
||||
SetUpImpl(&hwInfoHelper);
|
||||
}
|
||||
|
||||
void DeviceFixture::SetUpImpl(const OCLRT::HardwareInfo *hardwareInfo) {
|
||||
|
|
|
@ -46,3 +46,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) {
|
|||
GEN8TEST_F(Gen8DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) {
|
||||
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
|
||||
}
|
||||
|
||||
GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckSupportCacheFlushAfterWalkerThenFalse) {
|
||||
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker);
|
||||
}
|
||||
|
|
|
@ -59,3 +59,7 @@ GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThe
|
|||
GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) {
|
||||
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
|
||||
}
|
||||
|
||||
GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckSupportCacheFlushAfterWalkerThenFalse) {
|
||||
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker);
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ set(IGDRCL_SRCS_tests_helpers
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands_tests.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp
|
||||
|
|
|
@ -5,56 +5,52 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "unit_tests/fixtures/context_fixture.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/fixtures/image_fixture.h"
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
#include "unit_tests/fixtures/execution_model_kernel_fixture.h"
|
||||
#include "unit_tests/fixtures/image_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
#include "unit_tests/helpers/kernel_commands_tests.h"
|
||||
#include "unit_tests/indirect_heap/indirect_heap_fixture.h"
|
||||
#include "unit_tests/fixtures/built_in_fixture.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
#include "unit_tests/mocks/mock_program.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
#include "test.h"
|
||||
|
||||
#include <memory>
|
||||
#include "unit_tests/mocks/mock_graphics_allocation.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
struct KernelCommandsTest : DeviceFixture,
|
||||
ContextFixture,
|
||||
BuiltInFixture,
|
||||
::testing::Test {
|
||||
void KernelCommandsTest::SetUp() {
|
||||
DeviceFixture::SetUp();
|
||||
ASSERT_NE(nullptr, pDevice);
|
||||
cl_device_id device = pDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
ASSERT_NE(nullptr, pContext);
|
||||
BuiltInFixture::SetUp(pDevice);
|
||||
ASSERT_NE(nullptr, pBuiltIns);
|
||||
|
||||
using BuiltInFixture::SetUp;
|
||||
using ContextFixture::SetUp;
|
||||
mockKernelWithInternal = std::make_unique<MockKernelWithInternals>(*pDevice, pContext);
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
ASSERT_NE(nullptr, pDevice);
|
||||
cl_device_id device = pDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
ASSERT_NE(nullptr, pContext);
|
||||
BuiltInFixture::SetUp(pDevice);
|
||||
ASSERT_NE(nullptr, pBuiltIns);
|
||||
}
|
||||
void KernelCommandsTest::TearDown() {
|
||||
mockKernelWithInternal.reset(nullptr);
|
||||
BuiltInFixture::TearDown();
|
||||
ContextFixture::TearDown();
|
||||
DeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
BuiltInFixture::TearDown();
|
||||
ContextFixture::TearDown();
|
||||
DeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
size_t sizeRequiredCS;
|
||||
size_t sizeRequiredISH;
|
||||
};
|
||||
void KernelCommandsTest::addSpaceForSingleKernelArg() {
|
||||
kernelArguments.resize(1);
|
||||
kernelArguments[0] = kernelArgInfo;
|
||||
mockKernelWithInternal->kernelInfo.resizeKernelArgInfoAndRegisterParameter(1);
|
||||
mockKernelWithInternal->kernelInfo.kernelArgInfo.resize(1);
|
||||
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
|
||||
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0;
|
||||
mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t);
|
||||
mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments);
|
||||
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, programInterfaceDescriptorDataResourceUsage) {
|
||||
CommandQueueHw<FamilyType> cmdQ(pContext, pDevice, 0);
|
||||
|
@ -202,11 +198,10 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSe
|
|||
auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
IndirectHeap indirectHeap(nonInternalAllocation, false);
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
|
||||
auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize();
|
||||
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*mockKernelWithInternal.mockKernel,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
|
@ -219,11 +214,10 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendC
|
|||
IndirectHeap indirectHeap(internalAllocation, true);
|
||||
auto expectedOffset = internalAllocation->getGpuAddressToPatch();
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
|
||||
auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize();
|
||||
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*mockKernelWithInternal.mockKernel,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
|
@ -358,7 +352,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, sendIndirectStateResourceUsage)
|
|||
EXPECT_GE(sizeRequiredSSH, usedAfterSSH - usedBeforeSSH);
|
||||
|
||||
auto usedAfterCS = commandStream.getUsed();
|
||||
EXPECT_GE(KernelCommandsHelper<FamilyType>::getSizeRequiredCS(), usedAfterCS - usedBeforeCS);
|
||||
EXPECT_GE(KernelCommandsHelper<FamilyType>::getSizeRequiredCS(kernel), usedAfterCS - usedBeforeCS);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorContainsCorrectBindingTableEntryCount) {
|
||||
|
@ -370,10 +364,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE
|
|||
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, pContext);
|
||||
|
||||
auto expectedBindingTableCount = 3u;
|
||||
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
|
||||
|
@ -386,8 +378,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE
|
|||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernel.mockKernel,
|
||||
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
|
@ -415,11 +407,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn
|
|||
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, pContext);
|
||||
|
||||
auto expectedBindingTableCount = 3u;
|
||||
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
auto isScheduler = const_cast<bool *>(&mockKernel.mockKernel->isSchedulerKernel);
|
||||
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
auto isScheduler = const_cast<bool *>(&mockKernelWithInternal->mockKernel->isSchedulerKernel);
|
||||
*isScheduler = true;
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
|
||||
|
@ -433,8 +423,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn
|
|||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernel.mockKernel,
|
||||
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
|
@ -458,10 +448,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn
|
|||
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, pContext);
|
||||
|
||||
auto expectedBindingTableCount = 100u;
|
||||
mockKernel.mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount;
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
|
||||
|
@ -474,8 +462,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn
|
|||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernel.mockKernel,
|
||||
mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
|
@ -981,7 +969,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
|
|||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
MockKernelWithInternals kernelInternals(*pDevice);
|
||||
const size_t localWorkSizes[3]{1, 1, 1};
|
||||
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
@ -1007,8 +994,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
|
|||
memset(mockDsh, 6, borderColorSize);
|
||||
memset(mockDsh + borderColorSize, 8, borderColorSize);
|
||||
|
||||
kernelInternals.kernelInfo.heapInfo.pDsh = mockDsh;
|
||||
kernelInternals.kernelInfo.patchInfo.samplerStateArray = &samplerStateArray;
|
||||
mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh;
|
||||
mockKernelWithInternal->kernelInfo.patchInfo.samplerStateArray = &samplerStateArray;
|
||||
|
||||
uint64_t interfaceDescriptorTableOffset = dsh.getUsed();
|
||||
dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||
|
@ -1024,16 +1011,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
|
|||
pSamplerState[i].setIndirectStatePointer(0);
|
||||
}
|
||||
|
||||
MockKernel *kernel = new MockKernel(kernelInternals.mockProgram, kernelInternals.kernelInfo, *pDevice);
|
||||
kernel->setCrossThreadData(kernelInternals.crossThreadData, sizeof(kernelInternals.crossThreadData));
|
||||
kernel->setSshLocal(kernelInternals.sshLocal, sizeof(kernelInternals.sshLocal));
|
||||
mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData));
|
||||
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
KernelCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*kernel,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
8,
|
||||
localWorkSizes,
|
||||
interfaceDescriptorTableOffset,
|
||||
|
@ -1081,7 +1067,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
|
|||
EXPECT_EQ(borderColorOffset, pSamplerStatesCopied[i].getIndirectStatePointer());
|
||||
}
|
||||
|
||||
delete kernel;
|
||||
delete[] mockDsh;
|
||||
}
|
||||
|
||||
|
@ -1207,11 +1192,10 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThe
|
|||
|
||||
uint32_t crossThreadData[8];
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) {
|
||||
|
@ -1220,45 +1204,179 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInline
|
|||
|
||||
uint32_t crossThreadData[8];
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 0;
|
||||
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0;
|
||||
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1;
|
||||
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) {
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal.mockKernel));
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
||||
MockGraphicsAllocation globalAllocation;
|
||||
mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation);
|
||||
|
||||
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
|
||||
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream);
|
||||
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
|
||||
mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
||||
mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true;
|
||||
|
||||
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
|
||||
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream);
|
||||
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
||||
addSpaceForSingleKernelArg();
|
||||
MockGraphicsAllocation cacheRequiringAllocation;
|
||||
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
|
||||
|
||||
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL);
|
||||
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream);
|
||||
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
||||
addSpaceForSingleKernelArg();
|
||||
|
||||
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
|
||||
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream);
|
||||
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
|
||||
EXPECT_EQ(nullptr, pipeControl);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheFlushCommand) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
|
||||
hwInfoHelper.capabilityTable.supportCacheFlushAfterWalker = false;
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(nullptr, pDevice, 0);
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
|
||||
addSpaceForSingleKernelArg();
|
||||
MockGraphicsAllocation cacheRequiringAllocation;
|
||||
mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
|
||||
|
||||
size_t expectedSize = 2 * sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
|
||||
size_t actualSize = KernelCommandsHelper<FamilyType>::getSizeRequiredCS(mockKernelWithInternal->mockKernel);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream);
|
||||
PIPE_CONTROL *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
|
||||
EXPECT_EQ(nullptr, pipeControl);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "unit_tests/fixtures/context_fixture.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/fixtures/built_in_fixture.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
#include "unit_tests/mocks/mock_graphics_allocation.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
#include "unit_tests/mocks/mock_program.h"
|
||||
#include "test.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
struct KernelCommandsTest : DeviceFixture,
|
||||
ContextFixture,
|
||||
BuiltInFixture,
|
||||
::testing::Test {
|
||||
|
||||
using BuiltInFixture::SetUp;
|
||||
using ContextFixture::SetUp;
|
||||
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
|
||||
void addSpaceForSingleKernelArg();
|
||||
|
||||
size_t sizeRequiredCS;
|
||||
size_t sizeRequiredISH;
|
||||
|
||||
std::unique_ptr<MockKernelWithInternals> mockKernelWithInternal;
|
||||
Kernel::SimpleKernelArgInfo kernelArgInfo = {};
|
||||
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
|
||||
};
|
|
@ -508,7 +508,7 @@ TEST_F(CloneKernelTest, cloneKernelWithArgImmediate) {
|
|||
}
|
||||
|
||||
TEST_F(CloneKernelTest, cloneKernelWithExecInfo) {
|
||||
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
ASSERT_NE(nullptr, ptrSVM);
|
||||
|
||||
GraphicsAllocation *pSvmAlloc = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
|
||||
|
|
|
@ -167,3 +167,42 @@ TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAre
|
|||
EXPECT_EQ(0u, *pKernelArg32bit);
|
||||
EXPECT_NE(expValue, *pKernelArg64bit);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenWritebleBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
auto buffer = std::make_unique<MockBuffer>();
|
||||
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true);
|
||||
buffer->mockGfxAllocation.flushL3Required = false;
|
||||
|
||||
auto val = static_cast<cl_mem>(buffer.get());
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
auto buffer = std::make_unique<MockBuffer>();
|
||||
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
|
||||
buffer->mockGfxAllocation.flushL3Required = true;
|
||||
|
||||
auto val = static_cast<cl_mem>(buffer.get());
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
|
||||
auto buffer = std::make_unique<MockBuffer>();
|
||||
buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
|
||||
buffer->mockGfxAllocation.flushL3Required = false;
|
||||
|
||||
auto val = static_cast<cl_mem>(buffer.get());
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
|
|
@ -412,3 +412,90 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
|
|||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenWritebleSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
|
||||
svmAlloc.flushL3Required = false;
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.flushL3Required = true;
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.flushL3Required = false;
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
|
||||
svmAlloc.flushL3Required = false;
|
||||
|
||||
pKernel->setKernelExecInfo(&svmAlloc);
|
||||
EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.flushL3Required = true;
|
||||
|
||||
pKernel->setKernelExecInfo(&svmAlloc);
|
||||
EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) {
|
||||
size_t svmSize = 4096;
|
||||
void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize);
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
|
||||
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.flushL3Required = false;
|
||||
|
||||
pKernel->setKernelExecInfo(&svmAlloc);
|
||||
EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush);
|
||||
|
||||
alignedFree(svmPtr);
|
||||
}
|
||||
|
|
|
@ -259,3 +259,39 @@ TEST_F(KernelImageArgTest, givenKernelWithSharedImageWhenSetArgCalledThenUsingSh
|
|||
EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched);
|
||||
EXPECT_TRUE(pKernel->isUsingSharedObjArgs());
|
||||
}
|
||||
|
||||
TEST_F(KernelImageArgTest, givenWritebleImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
MockImageBase image;
|
||||
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(true);
|
||||
image.graphicsAllocation->flushL3Required = false;
|
||||
|
||||
cl_mem imageObj = ℑ
|
||||
|
||||
pKernel->setArg(0, sizeof(imageObj), &imageObj);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
MockImageBase image;
|
||||
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false);
|
||||
image.graphicsAllocation->flushL3Required = true;
|
||||
|
||||
cl_mem imageObj = ℑ
|
||||
|
||||
pKernel->setArg(0, sizeof(imageObj), &imageObj);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
|
||||
MockImageBase image;
|
||||
image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false);
|
||||
image.graphicsAllocation->flushL3Required = false;
|
||||
|
||||
cl_mem imageObj = ℑ
|
||||
|
||||
pKernel->setArg(0, sizeof(imageObj), &imageObj);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
|
|
@ -2355,3 +2355,101 @@ TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkK
|
|||
kernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) {
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast<GraphicsAllocation *>(0x1);
|
||||
|
||||
kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr);
|
||||
EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) {
|
||||
MockGraphicsAllocation mockAllocation;
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
|
||||
mockAllocation.setMemObjectsAllocationWithWritableFlags(false);
|
||||
mockAllocation.flushL3Required = true;
|
||||
|
||||
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
|
||||
EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenAllocationWriteableThenAssignAllocationPointerToCacheFlushVector) {
|
||||
MockGraphicsAllocation mockAllocation;
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
|
||||
mockAllocation.setMemObjectsAllocationWithWritableFlags(true);
|
||||
mockAllocation.flushL3Required = false;
|
||||
|
||||
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
|
||||
EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) {
|
||||
MockGraphicsAllocation mockAllocation;
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast<GraphicsAllocation *>(0x1);
|
||||
|
||||
mockAllocation.setMemObjectsAllocationWithWritableFlags(false);
|
||||
mockAllocation.flushL3Required = false;
|
||||
|
||||
kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation);
|
||||
EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]);
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenEnableCacheFlushFlagIsEnableWhenPlatformDoesNotSupportThenOverrideAndReturnSupportTrue) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
|
||||
HardwareInfo localHwInfo = *platformDevices[0];
|
||||
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false;
|
||||
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
EXPECT_TRUE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenEnableCacheFlushFlagIsDisableWhenPlatformSupportsThenOverrideAndReturnSupportFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(0);
|
||||
|
||||
HardwareInfo localHwInfo = *platformDevices[0];
|
||||
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true;
|
||||
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
EXPECT_FALSE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformDoesNotSupportThenReturnSupportFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
|
||||
|
||||
HardwareInfo localHwInfo = *platformDevices[0];
|
||||
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false;
|
||||
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
EXPECT_FALSE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformSupportsThenReturnSupportTrue) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(-1);
|
||||
|
||||
HardwareInfo localHwInfo = *platformDevices[0];
|
||||
localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true;
|
||||
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
EXPECT_TRUE(kernel.mockKernel->platformSupportCacheFlushAfterWalker());
|
||||
}
|
||||
|
|
|
@ -260,7 +260,7 @@ TEST_F(BufferSetArgTest, clSetKernelArgBuffer) {
|
|||
}
|
||||
|
||||
TEST_F(BufferSetArgTest, clSetKernelArgSVMPointer) {
|
||||
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256);
|
||||
void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, false, false);
|
||||
EXPECT_NE(nullptr, ptrSVM);
|
||||
|
||||
GraphicsAllocation *pSvmAlloc = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
|
||||
|
|
|
@ -494,7 +494,7 @@ TEST_F(RenderCompressedBuffersTests, givenDebugVariableSetWhenHwFlagIsNotSetThen
|
|||
TEST_F(RenderCompressedBuffersTests, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) {
|
||||
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
|
||||
|
||||
auto svmAlloc = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), false);
|
||||
auto svmAlloc = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), false, false);
|
||||
|
||||
buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmAlloc, retVal));
|
||||
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
|
@ -878,7 +878,7 @@ TEST_P(ValidHostPtr, failedAllocationInjection) {
|
|||
TEST_P(ValidHostPtr, SvmHostPtr) {
|
||||
const DeviceInfo &devInfo = pDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities != 0) {
|
||||
auto ptr = context->getSVMAllocsManager()->createSVMAlloc(64, false);
|
||||
auto ptr = context->getSVMAllocsManager()->createSVMAlloc(64, false, false);
|
||||
|
||||
auto bufferSvm = Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 64, ptr, retVal);
|
||||
EXPECT_NE(nullptr, bufferSvm);
|
||||
|
|
|
@ -31,7 +31,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocCreateNullFreeNull) {
|
|||
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
{
|
||||
SVMAllocsManager svmM(&memoryManager);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(0);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(0, false, false);
|
||||
EXPECT_EQ(Ptr1, nullptr);
|
||||
svmM.freeSVMAlloc(nullptr);
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocCreateFree) {
|
|||
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
{
|
||||
SVMAllocsManager svmM(&memoryManager);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
|
||||
EXPECT_NE(Ptr1, nullptr);
|
||||
|
||||
svmM.freeSVMAlloc(Ptr1);
|
||||
|
@ -72,7 +72,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocGetBeforeAndInside) {
|
|||
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
{
|
||||
SVMAllocsManager svmM(&memoryManager);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
|
||||
EXPECT_NE(Ptr1, nullptr);
|
||||
|
||||
char *Ptr2 = Ptr1 - 4;
|
||||
|
@ -93,7 +93,7 @@ TEST_F(SVMMemoryAllocatorTest, SVMAllocgetAfterSVM) {
|
|||
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
{
|
||||
SVMAllocsManager svmM(&memoryManager);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096);
|
||||
char *Ptr1 = (char *)svmM.createSVMAlloc(4096, false, false);
|
||||
EXPECT_NE(Ptr1, nullptr);
|
||||
|
||||
char *Ptr2 = Ptr1 + 4096 + 100;
|
||||
|
@ -129,7 +129,7 @@ TEST_F(SVMMemoryAllocatorTest, WhenCouldNotAllocateInMemoryManagerThenReturnsNul
|
|||
MockMemManager memoryManager(executionEnvironment);
|
||||
{
|
||||
MockSVMAllocsManager svmM{&memoryManager};
|
||||
void *svmPtr = svmM.createSVMAlloc(512);
|
||||
void *svmPtr = svmM.createSVMAlloc(512, false, false);
|
||||
EXPECT_EQ(nullptr, svmPtr);
|
||||
|
||||
EXPECT_EQ(0U, svmM.GetSVMAllocs().getNumAllocs());
|
||||
|
@ -151,3 +151,28 @@ TEST_F(SVMMemoryAllocatorTest, given64kbAllowedwhenAllocatingSvmMemoryThenDontPr
|
|||
myMemoryManager.allocateGraphicsMemoryForSVM(1, false);
|
||||
EXPECT_FALSE(myMemoryManager.preferRenderCompressedFlag);
|
||||
}
|
||||
|
||||
TEST_F(SVMMemoryAllocatorTest, whenReadOnlyFlagIsPresentThenReturnTrue) {
|
||||
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_ONLY));
|
||||
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_HOST_READ_ONLY));
|
||||
EXPECT_TRUE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_ONLY));
|
||||
}
|
||||
|
||||
TEST_F(SVMMemoryAllocatorTest, whenNoReadOnlyFlagIsPresentThenReturnFalse) {
|
||||
EXPECT_FALSE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_READ_WRITE));
|
||||
EXPECT_FALSE(SVMAllocsManager::memFlagIsReadOnly(CL_MEM_WRITE_ONLY));
|
||||
}
|
||||
|
||||
TEST_F(SVMMemoryAllocatorTest, whenReadOnlySvmAllocationCreatedThenGraphicsAllocationHasWriteableFlagFalse) {
|
||||
ExecutionEnvironment executionEnvironment;
|
||||
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
|
||||
SVMAllocsManager svmM(&memoryManager);
|
||||
void *svm = svmM.createSVMAlloc(4096, false, true);
|
||||
EXPECT_NE(nullptr, svm);
|
||||
|
||||
GraphicsAllocation *svmAllocation = svmM.getSVMAlloc(svm);
|
||||
EXPECT_NE(nullptr, svmAllocation);
|
||||
EXPECT_FALSE(svmAllocation->isMemObjectsAllocationWithWritableFlags());
|
||||
|
||||
svmM.freeSVMAlloc(svm);
|
||||
}
|
||||
|
|
|
@ -23,10 +23,14 @@ namespace OCLRT {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
class MockKernel : public Kernel {
|
||||
public:
|
||||
using Kernel::addAllocationToCacheFlushVector;
|
||||
using Kernel::auxTranslationRequired;
|
||||
using Kernel::isSchedulerKernel;
|
||||
using Kernel::kernelArgRequiresCacheFlush;
|
||||
using Kernel::kernelArguments;
|
||||
using Kernel::numberOfBindingTableStates;
|
||||
using Kernel::platformSupportCacheFlushAfterWalker;
|
||||
using Kernel::svmAllocationsRequireCacheFlush;
|
||||
|
||||
struct BlockPatchValues {
|
||||
uint64_t offset;
|
||||
|
@ -256,6 +260,7 @@ class MockKernelWithInternals {
|
|||
threadPayload.LocalIDZPresent = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.pSsh = sshLocal;
|
||||
kernelInfo.heapInfo.pDsh = dshLocal;
|
||||
kernelInfo.heapInfo.pKernelHeader = &kernelHeader;
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
|
@ -298,6 +303,7 @@ class MockKernelWithInternals {
|
|||
uint32_t kernelIsa[32];
|
||||
char crossThreadData[256];
|
||||
char sshLocal[128];
|
||||
char dshLocal[128];
|
||||
};
|
||||
|
||||
class MockParentKernel : public Kernel {
|
||||
|
|
|
@ -63,10 +63,10 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
|||
|
||||
std::unique_ptr<MockProgram> program;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
KernelInfo kernelInfo;
|
||||
|
||||
uint32_t kernelIsa[32];
|
||||
|
@ -78,15 +78,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
|
|||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, false, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, nullptr);
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, false, &kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, nullptr);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, &kernel);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
}
|
||||
|
@ -114,16 +116,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
|
|||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
requiredSize += 2 * sizeof(GPGPU_WALKER);
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
DispatchInfo dispatchInfo;
|
||||
dispatchInfo.setKernel(&kernel);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, &kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, 0, true, false, *pCmdQ, multiDispatchInfo);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
|
@ -525,19 +528,21 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
|
|||
|
||||
pCmdQ->setPerfCountersEnabled(true, 1);
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
//begin perf cmds
|
||||
requiredSize += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
|
||||
//end perf cmds
|
||||
requiredSize += 2 * sizeof(PIPE_CONTROL) + 3 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
|
||||
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, true, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, true, *pCmdQ, nullptr);
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, true, true, &kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, true, *pCmdQ, &kernel);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, true, *pCmdQ, nullptr);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, &kernel);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, true, *pCmdQ, &kernel);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
bool retVal = false;
|
||||
|
@ -576,9 +581,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
|
|||
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
|
||||
typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT;
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
|
||||
pCmdQ->setPerfCountersEnabled(true, 1);
|
||||
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS();
|
||||
uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + KernelCommandsHelper<FamilyType>::getSizeRequiredCS(&kernel);
|
||||
requiredSize += 2 * sizeof(GPGPU_WALKER);
|
||||
|
||||
//begin perf cmds
|
||||
|
@ -586,13 +593,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
|
|||
//end perf cmds
|
||||
requiredSize += 2 * sizeof(PIPE_CONTROL) + 3 * sizeof(MI_STORE_REGISTER_MEM) + OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_REPORT_PERF_COUNT) + pCmdQ->getPerfCountersUserRegistersNumber() * sizeof(MI_STORE_REGISTER_MEM);
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
DispatchInfo dispatchInfo;
|
||||
dispatchInfo.setKernel(&kernel);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, &kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, 0, true, true, *pCmdQ, multiDispatchInfo);
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
|
|
|
@ -101,3 +101,4 @@ EnableMakeResidentOnMapGpuVa = 0
|
|||
RenderCompressedImagesEnabled = -1
|
||||
RenderCompressedBuffersEnabled = -1
|
||||
AUBDumpForceAllToLocalMemory = 0
|
||||
EnableCacheFlushAfterWalker = 0
|
||||
|
|
Loading…
Reference in New Issue