fix: correct alignment of per thread scratch size
Related-To: NEO-5288 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
40af0dddeb
commit
1c0285a156
|
@ -468,7 +468,7 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi
|
||||||
EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation));
|
EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation));
|
||||||
|
|
||||||
// Enqueue With ScratchSize bigger than previous
|
// Enqueue With ScratchSize bigger than previous
|
||||||
scratchSize = 8196;
|
scratchSize = 8192;
|
||||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||||
|
|
||||||
enqueueKernel<FamilyType, false>(mockKernel);
|
enqueueKernel<FamilyType, false>(mockKernel);
|
||||||
|
|
|
@ -950,7 +950,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeSto
|
||||||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||||
ASSERT_NE(scratchAllocation, nullptr);
|
ASSERT_NE(scratchAllocation, nullptr);
|
||||||
|
|
||||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
commandStreamReceiver->setRequiredScratchSizes(8192, 0); // whatever > first size
|
||||||
|
|
||||||
flushTask(*commandStreamReceiver); // 2nd flush
|
flushTask(*commandStreamReceiver); // 2nd flush
|
||||||
|
|
||||||
|
@ -984,7 +984,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeSto
|
||||||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||||
ASSERT_NE(scratchAllocation, nullptr);
|
ASSERT_NE(scratchAllocation, nullptr);
|
||||||
|
|
||||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
commandStreamReceiver->setRequiredScratchSizes(8192, 0); // whatever > first size
|
||||||
|
|
||||||
flushTask(*commandStreamReceiver); // 2nd flush
|
flushTask(*commandStreamReceiver); // 2nd flush
|
||||||
|
|
||||||
|
|
|
@ -215,19 +215,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
||||||
EXPECT_FALSE(cfeStateDirty);
|
EXPECT_FALSE(cfeStateDirty);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedTo64) {
|
|
||||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
|
||||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver->getScratchSpaceController());
|
|
||||||
|
|
||||||
uint32_t perThreadScratchSize = 1;
|
|
||||||
uint32_t expectedValue = 1 << 6;
|
|
||||||
bool stateBaseAddressDirty = false;
|
|
||||||
bool cfeStateDirty = false;
|
|
||||||
uint8_t surfaceHeap[1000];
|
|
||||||
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
|
||||||
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndScratchAllocationExistsThenSetDirtyBitCopyCurrentState) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndScratchAllocationExistsThenSetDirtyBitCopyCurrentState) {
|
||||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
|
@ -584,25 +571,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
||||||
EXPECT_EQ(32u, scratchController->stateSlotsCount);
|
EXPECT_EQ(32u, scratchController->stateSlotsCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItTo64) {
|
|
||||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
|
||||||
DebugManagerStateRestore restorer;
|
|
||||||
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
|
|
||||||
RENDER_SURFACE_STATE surfaceState[4];
|
|
||||||
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
|
||||||
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver.getScratchSpaceController());
|
|
||||||
|
|
||||||
uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1;
|
|
||||||
|
|
||||||
bool cfeStateDirty = false;
|
|
||||||
bool stateBaseAddressDirty = false;
|
|
||||||
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
|
|
||||||
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
|
||||||
EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
|
||||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignUp(misalignedSizeForPrivateScratch, 64) * scratchController->computeUnitsUsedForScratch);
|
|
||||||
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenSizeForPrivateScratchSpaceIsProvidedThenItIsNotCreated) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenSizeForPrivateScratchSpaceIsProvidedThenItIsNotCreated) {
|
||||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "shared/source/execution_environment/root_device_environment.h"
|
#include "shared/source/execution_environment/root_device_environment.h"
|
||||||
#include "shared/source/helpers/aligned_memory.h"
|
#include "shared/source/helpers/aligned_memory.h"
|
||||||
#include "shared/source/helpers/api_specific_config.h"
|
#include "shared/source/helpers/api_specific_config.h"
|
||||||
|
#include "shared/source/helpers/basic_math.h"
|
||||||
#include "shared/source/helpers/constants.h"
|
#include "shared/source/helpers/constants.h"
|
||||||
#include "shared/source/helpers/gfx_core_helper.h"
|
#include "shared/source/helpers/gfx_core_helper.h"
|
||||||
#include "shared/source/memory_manager/allocation_properties.h"
|
#include "shared/source/memory_manager/allocation_properties.h"
|
||||||
|
@ -158,7 +159,10 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
||||||
bool &stateBaseAddressDirty,
|
bool &stateBaseAddressDirty,
|
||||||
bool &scratchSurfaceDirty,
|
bool &scratchSurfaceDirty,
|
||||||
bool &vfeStateDirty) {
|
bool &vfeStateDirty) {
|
||||||
uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64);
|
uint32_t requiredPerThreadScratchSizeAlignedUp = requiredPerThreadScratchSize;
|
||||||
|
if (!Math::isPow2(requiredPerThreadScratchSizeAlignedUp)) {
|
||||||
|
requiredPerThreadScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSize);
|
||||||
|
}
|
||||||
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||||
scratchSurfaceDirty = false;
|
scratchSurfaceDirty = false;
|
||||||
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
|
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
|
||||||
|
@ -174,7 +178,10 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
||||||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||||
}
|
}
|
||||||
if (privateScratchSpaceSupported) {
|
if (privateScratchSpaceSupported) {
|
||||||
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64);
|
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = requiredPerThreadPrivateScratchSize;
|
||||||
|
if (!Math::isPow2(requiredPerThreadPrivateScratchSizeAlignedUp)) {
|
||||||
|
requiredPerThreadPrivateScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadPrivateScratchSize);
|
||||||
|
}
|
||||||
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
size_t requiredPrivateScratchSizeInBytes = static_cast<size_t>(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch;
|
||||||
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
|
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
|
||||||
if (privateScratchAllocation) {
|
if (privateScratchAllocation) {
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||||
#include "shared/source/gmm_helper/page_table_mngr.h"
|
#include "shared/source/gmm_helper/page_table_mngr.h"
|
||||||
#include "shared/source/helpers/api_specific_config.h"
|
#include "shared/source/helpers/api_specific_config.h"
|
||||||
|
#include "shared/source/helpers/basic_math.h"
|
||||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||||
#include "shared/source/memory_manager/surface.h"
|
#include "shared/source/memory_manager/surface.h"
|
||||||
|
@ -41,6 +42,7 @@
|
||||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
||||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||||
|
#include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h"
|
||||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||||
#include "shared/test/common/test_macros/hw_test.h"
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
|
@ -4174,3 +4176,36 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
||||||
|
|
||||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(sipAllocation));
|
EXPECT_TRUE(commandStreamReceiver.isMadeResident(sipAllocation));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedNextPow2) {
|
||||||
|
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
|
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver->getScratchSpaceController());
|
||||||
|
|
||||||
|
uint32_t perThreadScratchSize = 65;
|
||||||
|
uint32_t expectedValue = Math::nextPowerOfTwo(perThreadScratchSize);
|
||||||
|
bool stateBaseAddressDirty = false;
|
||||||
|
bool cfeStateDirty = false;
|
||||||
|
uint8_t surfaceHeap[1000];
|
||||||
|
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||||
|
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItNextPow2) {
|
||||||
|
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
|
||||||
|
RENDER_SURFACE_STATE surfaceState[4];
|
||||||
|
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
|
auto scratchController = static_cast<MockScratchSpaceControllerXeHPAndLater *>(commandStreamReceiver.getScratchSpaceController());
|
||||||
|
|
||||||
|
uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1;
|
||||||
|
uint32_t alignedSizeForPrivateScratch = Math::nextPowerOfTwo(misalignedSizeForPrivateScratch);
|
||||||
|
|
||||||
|
bool cfeStateDirty = false;
|
||||||
|
bool stateBaseAddressDirty = false;
|
||||||
|
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
|
||||||
|
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||||
|
EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||||
|
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||||
|
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue