Enable L1 cache for Tigerlake

Change-Id: I33513ed084f9d06ceca11315cac03f1b682db535
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
Related-To: NEO-4832
This commit is contained in:
Maciej Dziuban
2020-10-01 12:22:54 +02:00
committed by sys_ocldev
parent ec054a87da
commit 138f04bdcd
19 changed files with 352 additions and 12 deletions

View File

@ -194,10 +194,10 @@ struct EncodeSurfaceState {
using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper);
static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
uint32_t numAvailableDevices);
bool isReadOnly, uint32_t numAvailableDevices);
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {
return ~(getSurfaceBaseAddressAlignment() - 1);

View File

@ -267,7 +267,7 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
template <typename Family>
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, uint32_t numAvailableDevices,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(dst);
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(size));
@ -306,7 +306,7 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
}
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, numAvailableDevices);
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices);
}
template <typename Family>

View File

@ -391,7 +391,7 @@ inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &devic
template <typename GfxFamily>
void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
uint32_t numAvailableDevices) {
bool isReadOnly, uint32_t numAvailableDevices) {
}
} // namespace NEO

View File

@ -55,6 +55,7 @@ DECLARE_DEBUG_VARIABLE(bool, ZebinAppendElws, false, "Append crossthread data wi
DECLARE_DEBUG_VARIABLE(bool, ZebinIgnoreIcbeVersion, false, "Ignore IGC\'s ICBE version")
DECLARE_DEBUG_VARIABLE(bool, UseExternalAllocatorForSshAndDsh, false, "Use 32 bit external Allocator for ssh and dsh in Level Zero")
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
DECLARE_DEBUG_VARIABLE(int32_t, ForceL1Caching, -1, "-1: default, 0: disable, 1: enable, When set to true driver will program L1 cache policy for surface state and stateless accessess")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationEnabled, -1, "-1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information")
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerGWS, 0, "Forces gws of scheduler kernel, only multiple of 24 allowed or 0 - default selected")

View File

@ -47,6 +47,24 @@ void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStre
}
}
template <>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices) {
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (isL3Allowed) {
const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
bool useL1 = isReadOnly || isConstantSurface;
if (DebugManager.flags.ForceL1Caching.get() != -1) {
useL1 = !!DebugManager.flags.ForceL1Caching.get();
}
if (useL1) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
}
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@ -240,6 +240,23 @@ bool HwHelperHw<Family>::useOnlyGlobalTimestamps() const {
return true;
}
template <>
uint32_t HwHelperHw<Family>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
if (l3enabled) {
if (DebugManager.flags.ForceL1Caching.get() != -1) {
l1enabled = !!DebugManager.flags.ForceL1Caching.get();
}
if (l1enabled) {
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1;
} else {
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
}
}
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
}
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;
template struct MemorySynchronizationCommands<Family>;

View File

@ -47,7 +47,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr
length.Length = static_cast<uint32_t>(allocSize - 1);
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, 1u,
false, false, false, 1u,
&allocation, pDevice->getGmmHelper());
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
@ -74,7 +74,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
true, false, 1u,
true, false, false, 1u,
nullptr, pDevice->getGmmHelper());
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
@ -100,7 +100,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, 1u,
false, false, false, 1u,
nullptr, pDevice->getGmmHelper());
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());