diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index f708194f70..eac9f08e0f 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -161,7 +161,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, whenProgra EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } -HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenScratchAllocationRequestedThenProgramCfeStateWithScratchAllocation) { +HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenSratchAllocationRequestedThenProgramCfeStateWithScratchAllocation) { using CFE_STATE = typename FamilyType::CFE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; @@ -179,10 +179,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t computeUnits = hwHelper.getComputeUnitsUsedForScratch(&hwInfo); - - auto maxSubSlice = HwInfoConfig::get(hwInfo.platform.eProductFamily)->computeMaxNeededSubSliceSpace(hwInfo); - auto maxHwThreadCount = maxSubSlice * hwInfo.gtSystemInfo.MaxEuPerSubSlice * (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); - size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); @@ -203,7 +199,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, scratchState->getSurfaceType()); SURFACE_STATE_BUFFER_LENGTH length = {0}; - length.Length = static_cast(maxHwThreadCount - 1); + length.Length = static_cast(computeUnits - 1); EXPECT_EQ(length.SurfaceState.Depth + 1u, scratchState->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, scratchState->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, scratchState->getHeight()); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp index 9a042532e0..6e7393c62a 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp @@ -16,12 +16,9 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/ult_aub_command_stream_receiver.h" -#include "shared/test/common/mocks/mock_csr.h" -#include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" -#include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" @@ -314,76 +311,3 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent } alignedFree(svmPtr); } - -using PvcCommandStreamReceiverTests = ::testing::Test; -PVCTEST_F(PvcCommandStreamReceiverTests, givenScratchSpaceRequiredWhenScratchSurfaceStateIsProgrammedThenSizeDoesNotExceedMaxThreadCount) { - using CFE_STATE = typename FamilyType::CFE_STATE; - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - - HardwareInfo hwInfo = *defaultHwInfo; - - int revisions[] = {0, 3}; - int expectComputeUnitsEqualMaxThreadCount[] = {true, false}; - - for (auto i = 0; i < 2; i++) { - hwInfo.platform.usRevId = revisions[i]; - auto pDevice = MockClDevice::createWithNewExecutionEnvironment(&hwInfo, 0); - ASSERT_NE(nullptr, pDevice); - auto pClDevice = std::make_unique(pDevice); - - size_t GWS = 1; - MockContext ctx(pClDevice.get()); - MockKernelWithInternals kernel(*pClDevice); - CommandQueueHw commandQueue(&ctx, pClDevice.get(), 0, false); - auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); - scratchController->slotId = 2u; - pDevice->resetCommandStreamReceiver(commandStreamReceiver); - auto &commandStreamCSR = commandStreamReceiver->getCS(); - - kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000; - auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - uint32_t computeUnits = hwHelper.getComputeUnitsUsedForScratch(&hwInfo); - - auto maxSubSlice = HwInfoConfig::get(hwInfo.platform.eProductFamily)->computeMaxNeededSubSliceSpace(hwInfo); - auto maxHwThreadCount = maxSubSlice * hwInfo.gtSystemInfo.MaxEuPerSubSlice * (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); - - if (expectComputeUnitsEqualMaxThreadCount[i]) { - EXPECT_EQ(maxHwThreadCount, computeUnits); - } else { - EXPECT_NE(maxHwThreadCount, computeUnits); - } - - size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits; - - commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); - commandQueue.flush(); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, - ptrOffset(commandStreamCSR.getCpuBase(), 0), - commandStreamCSR.getUsed())); - - auto itor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itor); - - CFE_STATE *cfeState = genCmdCast(*itor); - ASSERT_NE(nullptr, cfeState); - - EXPECT_EQ(scratchSpaceSize, scratchController->getScratchSpaceAllocation()->getUnderlyingBufferSize()); - - uint32_t bufferOffset = static_cast(scratchController->slotId * scratchController->singleSurfaceStateSize * 2); - EXPECT_EQ(bufferOffset, cfeState->getScratchSpaceBuffer()); - RENDER_SURFACE_STATE *scratchState = reinterpret_cast(scratchController->surfaceStateHeap + bufferOffset); - EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), scratchState->getSurfaceBaseAddress()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, scratchState->getSurfaceType()); - - SURFACE_STATE_BUFFER_LENGTH length = {0}; - length.Length = static_cast(maxHwThreadCount - 1); - EXPECT_EQ(length.SurfaceState.Depth + 1u, scratchState->getDepth()); - EXPECT_EQ(length.SurfaceState.Width + 1u, scratchState->getWidth()); - EXPECT_EQ(length.SurfaceState.Height + 1u, scratchState->getHeight()); - EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], scratchState->getSurfacePitch()); - } -} diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp index c60840b1f1..78d1ad92e7 100644 --- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp +++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp @@ -80,18 +80,13 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() { } void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) { - const auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); - auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); uint64_t scratchAllocationAddress = 0u; if (scratchAllocation) { scratchAllocationAddress = scratchAllocation->getGpuAddress(); } - - auto maxSubSlice = HwInfoConfig::get(hwInfo->platform.eProductFamily)->computeMaxNeededSubSliceSpace(*hwInfo); - auto maxHwThreadCount = maxSubSlice * hwInfo->gtSystemInfo.MaxEuPerSubSlice * (hwInfo->gtSystemInfo.ThreadCount / hwInfo->gtSystemInfo.EUCount); - hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], - surfaceStateForScratchAllocation, maxHwThreadCount, scratchAllocationAddress, 0, + surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0, perThreadScratchSize, nullptr, false, scratchType, false, true); if (privateScratchSpaceSupported) { @@ -102,7 +97,7 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceS privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress(); } hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], - surfaceStateForPrivateScratchAllocation, maxHwThreadCount, + surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch, privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false, scratchType, false, true); }