fix: correct alignment of per thread scratch size
Related-To: NEO-5288 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
40af0dddeb
commit
1c0285a156
|
@ -468,7 +468,7 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi
|
|||
EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation));
|
||||
|
||||
// Enqueue With ScratchSize bigger than previous
|
||||
scratchSize = 8196;
|
||||
scratchSize = 8192;
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
enqueueKernel<FamilyType, false>(mockKernel);
|
||||
|
|
|
@ -950,7 +950,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeSto
|
|||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||
ASSERT_NE(scratchAllocation, nullptr);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
||||
commandStreamReceiver->setRequiredScratchSizes(8192, 0); // whatever > first size
|
||||
|
||||
flushTask(*commandStreamReceiver); // 2nd flush
|
||||
|
||||
|
@ -984,7 +984,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeSto
|
|||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||
ASSERT_NE(scratchAllocation, nullptr);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
||||
commandStreamReceiver->setRequiredScratchSizes(8192, 0); // whatever > first size
|
||||
|
||||
flushTask(*commandStreamReceiver); // 2nd flush
|
||||
|
||||
|
|
|
@ -215,19 +215,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
|||
EXPECT_FALSE(cfeStateDirty);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedTo64) {
|
||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver->getScratchSpaceController());
|
||||
|
||||
uint32_t perThreadScratchSize = 1;
|
||||
uint32_t expectedValue = 1 << 6;
|
||||
bool stateBaseAddressDirty = false;
|
||||
bool cfeStateDirty = false;
|
||||
uint8_t surfaceHeap[1000];
|
||||
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndScratchAllocationExistsThenSetDirtyBitCopyCurrentState) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
|
@ -584,25 +571,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
|||
EXPECT_EQ(32u, scratchController->stateSlotsCount);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItTo64) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
|
||||
RENDER_SURFACE_STATE surfaceState[4];
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver.getScratchSpaceController());
|
||||
|
||||
uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1;
|
||||
|
||||
bool cfeStateDirty = false;
|
||||
bool stateBaseAddressDirty = false;
|
||||
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
|
||||
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||
EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignUp(misalignedSizeForPrivateScratch, 64) * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenSizeForPrivateScratchSpaceIsProvidedThenItIsNotCreated) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
DebugManagerStateRestore restorer;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
|
@ -158,7 +159,10 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
|||
bool &stateBaseAddressDirty,
|
||||
bool &scratchSurfaceDirty,
|
||||
bool &vfeStateDirty) {
|
||||
uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64);
|
||||
uint32_t requiredPerThreadScratchSizeAlignedUp = requiredPerThreadScratchSize;
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeAlignedUp)) {
|
||||
requiredPerThreadScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSize);
|
||||
}
|
||||
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||
scratchSurfaceDirty = false;
|
||||
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
|
||||
|
@ -174,7 +178,10 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
|||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
}
|
||||
if (privateScratchSpaceSupported) {
|
||||
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64);
|
||||
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = requiredPerThreadPrivateScratchSize;
|
||||
if (!Math::isPow2(requiredPerThreadPrivateScratchSizeAlignedUp)) {
|
||||
requiredPerThreadPrivateScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadPrivateScratchSize);
|
||||
}
|
||||
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
|
||||
if (privateScratchAllocation) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/page_table_mngr.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
|
@ -41,6 +42,7 @@
|
|||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
@ -4174,3 +4176,36 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
|||
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(sipAllocation));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedNextPow2) {
|
||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver->getScratchSpaceController());
|
||||
|
||||
uint32_t perThreadScratchSize = 65;
|
||||
uint32_t expectedValue = Math::nextPowerOfTwo(perThreadScratchSize);
|
||||
bool stateBaseAddressDirty = false;
|
||||
bool cfeStateDirty = false;
|
||||
uint8_t surfaceHeap[1000];
|
||||
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItNextPow2) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
|
||||
RENDER_SURFACE_STATE surfaceState[4];
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver.getScratchSpaceController());
|
||||
|
||||
uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1;
|
||||
uint32_t alignedSizeForPrivateScratch = Math::nextPowerOfTwo(misalignedSizeForPrivateScratch);
|
||||
|
||||
bool cfeStateDirty = false;
|
||||
bool stateBaseAddressDirty = false;
|
||||
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
|
||||
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||
EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue