mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
fix: Align IOH entry
Related-To: NEO-10036 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e16b5378da
commit
cfd3edfb2c
@@ -1000,7 +1000,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
|||||||
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
||||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||||
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
|
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
|
||||||
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize);
|
EXPECT_EQ(indirectHeap->getUsed(), alignUp(sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize, gfxCoreHelper.getIOHAlignment()));
|
||||||
|
|
||||||
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||||
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2019-2023 Intel Corporation
|
* Copyright (C) 2019-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -327,6 +327,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||||||
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
|
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
|
||||||
WalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
WalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||||
walkerCmd->setIndirectDataLength(indirectDataLength);
|
walkerCmd->setIndirectDataLength(indirectDataLength);
|
||||||
|
|
||||||
|
ioh.align(kernel.getGfxCoreHelper().getIOHAlignment());
|
||||||
}
|
}
|
||||||
|
|
||||||
return offsetCrossThreadData;
|
return offsetCrossThreadData;
|
||||||
|
|||||||
@@ -343,6 +343,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
} else {
|
} else {
|
||||||
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||||
walkerCmd.setIndirectDataLength(sizeThreadData);
|
walkerCmd.setIndirectDataLength(sizeThreadData);
|
||||||
|
|
||||||
|
container.getIndirectHeap(HeapType::indirectObject)->align(rootDeviceEnvironment.getHelper<GfxCoreHelper>().getIOHAlignment());
|
||||||
}
|
}
|
||||||
|
|
||||||
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,
|
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,
|
||||||
|
|||||||
@@ -574,6 +574,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSemaphoreDelayBetweenWaits, -1, "Specifies
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceLocalMemoryAccessMode, -1, "-1: don't override, 0: default rules apply, 1: CPU can access local memory, 3: CPU never accesses local memory")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceLocalMemoryAccessMode, -1, "-1: don't override, 0: default rules apply, 1: CPU can access local memory, 3: CPU never accesses local memory")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceUserptrAlignment, -1, "-1: no force (4kb), >0: n kb alignment")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceUserptrAlignment, -1, "-1: no force (4kb), >0: n kb alignment")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCommandBufferAlignment, -1, "-1: no force (64kb), >0: n kb alignment")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceCommandBufferAlignment, -1, "-1: no force (64kb), >0: n kb alignment")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceIOHAlignment, -1, "-1: no force, >0: n byte alignment")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceDefaultHeapSize, -1, "-1: no force (64kb), >0: n kb size")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceDefaultHeapSize, -1, "-1: no force (64kb), >0: n kb size")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, PreferCopyEngineForCopyBufferToBuffer, -1, "-1: default, 0: prefer EUs, 1: prefer blitter")
|
DECLARE_DEBUG_VARIABLE(int32_t, PreferCopyEngineForCopyBufferToBuffer, -1, "-1: default, 0: prefer EUs, 1: prefer blitter")
|
||||||
DECLARE_DEBUG_VARIABLE(int64_t, ForceSystemMemoryPlacement, 0, "0: default, >0: (bitmask) for given Graphics Allocation Type, force system memory placement")
|
DECLARE_DEBUG_VARIABLE(int64_t, ForceSystemMemoryPlacement, 0, "0: default, >0: (bitmask) for given Graphics Allocation Type, force system memory placement")
|
||||||
|
|||||||
@@ -159,6 +159,7 @@ class GfxCoreHelper {
|
|||||||
virtual size_t getMax3dImageWidthOrHeight() const = 0;
|
virtual size_t getMax3dImageWidthOrHeight() const = 0;
|
||||||
virtual uint64_t getMaxMemAllocSize() const = 0;
|
virtual uint64_t getMaxMemAllocSize() const = 0;
|
||||||
virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0;
|
virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0;
|
||||||
|
virtual size_t getIOHAlignment() const = 0;
|
||||||
virtual bool isStatelessToStatefulWithOffsetSupported() const = 0;
|
virtual bool isStatelessToStatefulWithOffsetSupported() const = 0;
|
||||||
virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const = 0;
|
virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const = 0;
|
||||||
virtual bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
virtual bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||||
@@ -383,6 +384,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
|||||||
size_t getMax3dImageWidthOrHeight() const override;
|
size_t getMax3dImageWidthOrHeight() const override;
|
||||||
uint64_t getMaxMemAllocSize() const override;
|
uint64_t getMaxMemAllocSize() const override;
|
||||||
uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
|
uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
|
||||||
|
size_t getIOHAlignment() const override;
|
||||||
bool isStatelessToStatefulWithOffsetSupported() const override;
|
bool isStatelessToStatefulWithOffsetSupported() const override;
|
||||||
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override;
|
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override;
|
||||||
bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
bool platformSupportsImplicitScaling(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||||
|
|||||||
@@ -668,6 +668,11 @@ uint64_t GfxCoreHelperHw<GfxFamily>::getPatIndex(CacheRegion cacheRegion, CacheP
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
size_t GfxCoreHelperHw<GfxFamily>::getIOHAlignment() const {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
bool GfxCoreHelperHw<GfxFamily>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
|
bool GfxCoreHelperHw<GfxFamily>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
|
||||||
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {
|
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {
|
||||||
|
|||||||
@@ -279,6 +279,7 @@ OverrideSlmSize = -1
|
|||||||
UseCyclesPerSecondTimer = 0
|
UseCyclesPerSecondTimer = 0
|
||||||
PrintOsContextInitializations = 0
|
PrintOsContextInitializations = 0
|
||||||
WaitLoopCount = -1
|
WaitLoopCount = -1
|
||||||
|
ForceIOHAlignment = -1
|
||||||
DebuggerLogBitmask = 0
|
DebuggerLogBitmask = 0
|
||||||
GTPinAllocateBufferInSharedMemory = -1
|
GTPinAllocateBufferInSharedMemory = -1
|
||||||
DeferOsContextInitialization = -1
|
DeferOsContextInitialization = -1
|
||||||
|
|||||||
@@ -592,9 +592,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredWhe
|
|||||||
EXPECT_EQ(1u, cmd->getEmitInlineParameter());
|
EXPECT_EQ(1u, cmd->getEmitInlineParameter());
|
||||||
|
|
||||||
const uint32_t inlineDataSize = sizeof(InlineData);
|
const uint32_t inlineDataSize = sizeof(InlineData);
|
||||||
size_t expectedSizeIOH = dispatchInterface->getCrossThreadDataSize() +
|
size_t expectedSizeIOH = alignUp(dispatchInterface->getCrossThreadDataSize() +
|
||||||
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup() -
|
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup() -
|
||||||
inlineDataSize;
|
inlineDataSize,
|
||||||
|
this->getHelper<GfxCoreHelper>().getIOHAlignment());
|
||||||
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
|
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
|
||||||
EXPECT_EQ(expectedSizeIOH, heap->getUsed());
|
EXPECT_EQ(expectedSizeIOH, heap->getUsed());
|
||||||
}
|
}
|
||||||
@@ -619,8 +620,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredIsF
|
|||||||
auto cmd = genCmdCast<DefaultWalkerType *>(*itor);
|
auto cmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||||
EXPECT_EQ(0u, cmd->getEmitInlineParameter());
|
EXPECT_EQ(0u, cmd->getEmitInlineParameter());
|
||||||
|
|
||||||
size_t expectedSizeIOH = dispatchInterface->getCrossThreadDataSize() +
|
size_t expectedSizeIOH = alignUp(dispatchInterface->getCrossThreadDataSize() +
|
||||||
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
|
dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(),
|
||||||
|
this->getHelper<GfxCoreHelper>().getIOHAlignment());
|
||||||
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
|
auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject);
|
||||||
EXPECT_EQ(expectedSizeIOH, heap->getUsed());
|
EXPECT_EQ(expectedSizeIOH, heap->getUsed());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user