fix: Align IOH entry

Related-To: NEO-10036

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-02-26 12:10:21 +00:00
committed by Compute-Runtime-Automation
parent e16b5378da
commit cfd3edfb2c
8 changed files with 22 additions and 7 deletions

View File

@@ -1000,7 +1000,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper); implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup(); auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize); EXPECT_EQ(indirectHeap->getUsed(), alignUp(sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize, gfxCoreHelper.getIOHAlignment()));
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) { if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress(); expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2023 Intel Corporation * Copyright (C) 2019-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -327,6 +327,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal), auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
WalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); WalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
walkerCmd->setIndirectDataLength(indirectDataLength); walkerCmd->setIndirectDataLength(indirectDataLength);
ioh.align(kernel.getGfxCoreHelper().getIOHAlignment());
} }
return offsetCrossThreadData; return offsetCrossThreadData;

View File

@@ -343,6 +343,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
} else { } else {
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData)); walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData); walkerCmd.setIndirectDataLength(sizeThreadData);
container.getIndirectHeap(HeapType::indirectObject)->align(rootDeviceEnvironment.getHelper<GfxCoreHelper>().getIOHAlignment());
} }
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd, EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,

View File

@@ -574,6 +574,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSemaphoreDelayBetweenWaits, -1, "Specifies
DECLARE_DEBUG_VARIABLE(int32_t, ForceLocalMemoryAccessMode, -1, "-1: don't override, 0: default rules apply, 1: CPU can access local memory, 3: CPU never accesses local memory") DECLARE_DEBUG_VARIABLE(int32_t, ForceLocalMemoryAccessMode, -1, "-1: don't override, 0: default rules apply, 1: CPU can access local memory, 3: CPU never accesses local memory")
DECLARE_DEBUG_VARIABLE(int32_t, ForceUserptrAlignment, -1, "-1: no force (4kb), >0: n kb alignment") DECLARE_DEBUG_VARIABLE(int32_t, ForceUserptrAlignment, -1, "-1: no force (4kb), >0: n kb alignment")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCommandBufferAlignment, -1, "-1: no force (64kb), >0: n kb alignment") DECLARE_DEBUG_VARIABLE(int32_t, ForceCommandBufferAlignment, -1, "-1: no force (64kb), >0: n kb alignment")
DECLARE_DEBUG_VARIABLE(int32_t, ForceIOHAlignment, -1, "-1: no force, >0: n byte alignment")
DECLARE_DEBUG_VARIABLE(int32_t, ForceDefaultHeapSize, -1, "-1: no force (64kb), >0: n kb size") DECLARE_DEBUG_VARIABLE(int32_t, ForceDefaultHeapSize, -1, "-1: no force (64kb), >0: n kb size")
DECLARE_DEBUG_VARIABLE(int32_t, PreferCopyEngineForCopyBufferToBuffer, -1, "-1: default, 0: prefer EUs, 1: prefer blitter") DECLARE_DEBUG_VARIABLE(int32_t, PreferCopyEngineForCopyBufferToBuffer, -1, "-1: default, 0: prefer EUs, 1: prefer blitter")
DECLARE_DEBUG_VARIABLE(int64_t, ForceSystemMemoryPlacement, 0, "0: default, >0: (bitmask) for given Graphics Allocation Type, force system memory placement") DECLARE_DEBUG_VARIABLE(int64_t, ForceSystemMemoryPlacement, 0, "0: default, >0: (bitmask) for given Graphics Allocation Type, force system memory placement")

View File

@@ -159,6 +159,7 @@ class GfxCoreHelper {
virtual size_t getMax3dImageWidthOrHeight() const = 0; virtual size_t getMax3dImageWidthOrHeight() const = 0;
virtual uint64_t getMaxMemAllocSize() const = 0; virtual uint64_t getMaxMemAllocSize() const = 0;
virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0; virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0;
virtual size_t getIOHAlignment() const = 0;
virtual bool isStatelessToStatefulWithOffsetSupported() const = 0; virtual bool isStatelessToStatefulWithOffsetSupported() const = 0;
virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const = 0; virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const = 0;
virtual bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const = 0; virtual bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
@@ -383,6 +384,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
size_t getMax3dImageWidthOrHeight() const override; size_t getMax3dImageWidthOrHeight() const override;
uint64_t getMaxMemAllocSize() const override; uint64_t getMaxMemAllocSize() const override;
uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override; uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
size_t getIOHAlignment() const override;
bool isStatelessToStatefulWithOffsetSupported() const override; bool isStatelessToStatefulWithOffsetSupported() const override;
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override; void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override;
bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const override; bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const override;

View File

@@ -668,6 +668,11 @@ uint64_t GfxCoreHelperHw<GfxFamily>::getPatIndex(CacheRegion cacheRegion, CacheP
return -1; return -1;
} }
template <typename GfxFamily>
size_t GfxCoreHelperHw<GfxFamily>::getIOHAlignment() const {
return 1;
}
template <typename GfxFamily> template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const { bool GfxCoreHelperHw<GfxFamily>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) { if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {

View File

@@ -279,6 +279,7 @@ OverrideSlmSize = -1
UseCyclesPerSecondTimer = 0 UseCyclesPerSecondTimer = 0
PrintOsContextInitializations = 0 PrintOsContextInitializations = 0
WaitLoopCount = -1 WaitLoopCount = -1
ForceIOHAlignment = -1
DebuggerLogBitmask = 0 DebuggerLogBitmask = 0
GTPinAllocateBufferInSharedMemory = -1 GTPinAllocateBufferInSharedMemory = -1
DeferOsContextInitialization = -1 DeferOsContextInitialization = -1

View File

@@ -592,9 +592,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredWhe
EXPECT_EQ(1u, cmd->getEmitInlineParameter()); EXPECT_EQ(1u, cmd->getEmitInlineParameter());
const uint32_t inlineDataSize = sizeof(InlineData); const uint32_t inlineDataSize = sizeof(InlineData);
size_t expectedSizeIOH = dispatchInterface->getCrossThreadDataSize() + size_t expectedSizeIOH = alignUp(dispatchInterface->getCrossThreadDataSize() +
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup() - dispatchInterface->getPerThreadDataSizeForWholeThreadGroup() -
inlineDataSize; inlineDataSize,
this->getHelper<GfxCoreHelper>().getIOHAlignment());
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject); auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
EXPECT_EQ(expectedSizeIOH, heap->getUsed()); EXPECT_EQ(expectedSizeIOH, heap->getUsed());
} }
@@ -619,8 +620,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredIsF
auto cmd = genCmdCast<DefaultWalkerType *>(*itor); auto cmd = genCmdCast<DefaultWalkerType *>(*itor);
EXPECT_EQ(0u, cmd->getEmitInlineParameter()); EXPECT_EQ(0u, cmd->getEmitInlineParameter());
size_t expectedSizeIOH = dispatchInterface->getCrossThreadDataSize() + size_t expectedSizeIOH = alignUp(dispatchInterface->getCrossThreadDataSize() +
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(); dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(),
this->getHelper<GfxCoreHelper>().getIOHAlignment());
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject); auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
EXPECT_EQ(expectedSizeIOH, heap->getUsed()); EXPECT_EQ(expectedSizeIOH, heap->getUsed());
} }