diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index ee6814a3b6..89af2a1be4 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -221,7 +221,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.mediaSamplerRequired); - size_t requiredScratchSizeInBytes = requiredScratchSize * (hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount); + size_t requiredScratchSizeInBytes = requiredScratchSize * device->getDeviceInfo().computeUnitsUsedForScratch; auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations(); @@ -731,5 +731,4 @@ void CommandStreamReceiverHw::resetKmdNotifyHelper(KmdNotifyHelper *n template void CommandStreamReceiverHw::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) { } - } // namespace OCLRT diff --git a/runtime/device/device_caps.cpp b/runtime/device/device_caps.cpp index be1cd88d8d..7fbfa17495 100644 --- a/runtime/device/device_caps.cpp +++ b/runtime/device/device_caps.cpp @@ -275,20 +275,16 @@ void Device::initializeCaps() { deviceInfo.numThreadsPerEU = 0; auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8; - if (systemInfo.EUCount > 0) { - deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.pSkuTable->ftrPooledEuEnabled == 0) - ? (systemInfo.EUCount / systemInfo.SubSliceCount) - : systemInfo.EuCountPerPoolMin; - deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount; - auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u; - auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed; + deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.pSkuTable->ftrPooledEuEnabled == 0) + ? (systemInfo.EUCount / systemInfo.SubSliceCount) + : systemInfo.EuCountPerPoolMin; + deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount; + auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u; + auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed; + + maxWS = Math::prevPowerOfTwo(uint32_t(maxWS)); + deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize); - maxWS = Math::prevPowerOfTwo(uint32_t(maxWS)); - deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize); - } else { - //default value if systemInfo not provided - deviceInfo.maxWorkGroupSize = 128; - } DEBUG_BREAK_IF(!DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() && deviceInfo.maxWorkGroupSize > 256); // calculate a maximum number of subgroups in a workgroup (for the required SIMD size) @@ -310,9 +306,7 @@ void Device::initializeCaps() { systemInfo.MaxSlicesSupported, systemInfo.MaxSubSlicesSupported); - if (systemInfo.EUCount > 0) { - deviceInfo.computeUnitsUsedForScratch = systemInfo.MaxSubSlicesSupported * systemInfo.MaxEuPerSubSlice * systemInfo.ThreadCount / systemInfo.EUCount; - } + deviceInfo.computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(&hwInfo); printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "computeUnitsUsedForScratch: %d\n", deviceInfo.computeUnitsUsedForScratch); diff --git a/runtime/helpers/hw_helper.h b/runtime/helpers/hw_helper.h index a0e9e0000e..90450aa207 100644 --- a/runtime/helpers/hw_helper.h +++ b/runtime/helpers/hw_helper.h @@ -39,6 +39,7 @@ class HwHelper { virtual uint32_t getBindingTableStateAlignement() const = 0; virtual size_t getInterfaceDescriptorDataSize() const = 0; virtual size_t getMaxBarrierRegisterPerSlice() const = 0; + virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0; virtual void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) = 0; virtual bool setupPreemptionRegisters(HardwareInfo *pHwInfo, bool enable) = 0; virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0; @@ -81,6 +82,8 @@ class HwHelperHw : public HwHelper { size_t getMaxBarrierRegisterPerSlice() const override; + uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override; + void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) override; bool setupPreemptionRegisters(HardwareInfo *pHwInfo, bool enable) override; diff --git a/runtime/helpers/hw_helper.inl b/runtime/helpers/hw_helper.inl index c1a10394c3..e16c4bfac8 100644 --- a/runtime/helpers/hw_helper.inl +++ b/runtime/helpers/hw_helper.inl @@ -39,6 +39,12 @@ void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps) { caps->image3DMaxWidth = 16384; } +template +uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { + return pHwInfo->pSysInfo->MaxSubSlicesSupported * pHwInfo->pSysInfo->MaxEuPerSubSlice * + pHwInfo->pSysInfo->ThreadCount / pHwInfo->pSysInfo->EUCount; +} + template SipKernelType HwHelperHw::getSipKernelType(bool debuggingActive) { if (!debuggingActive) { diff --git a/unit_tests/device/device_caps_tests.cpp b/unit_tests/device/device_caps_tests.cpp index 41e92b72aa..77fcdbc775 100644 --- a/unit_tests/device/device_caps_tests.cpp +++ b/unit_tests/device/device_caps_tests.cpp @@ -134,12 +134,6 @@ TEST(Device_GetCaps, validate) { EXPECT_GE((4 * GB) - (8 * KB), caps.maxMemAllocSize); EXPECT_LE(65536u, caps.imageMaxBufferSize); - if (sysInfo.EUCount > 0) { - auto expected = sysInfo.MaxSubSlicesSupported * sysInfo.MaxEuPerSubSlice * - sysInfo.ThreadCount / sysInfo.EUCount; - EXPECT_EQ(expected, caps.computeUnitsUsedForScratch); - } - EXPECT_GT(caps.maxWorkGroupSize, 0u); EXPECT_EQ(caps.maxWorkItemSizes[0], caps.maxWorkGroupSize); EXPECT_EQ(caps.maxWorkItemSizes[1], caps.maxWorkGroupSize); @@ -218,20 +212,6 @@ TEST(Device_GetCaps, validateImage3DDimensions) { EXPECT_EQ(2048u, caps.image3DMaxDepth); } -TEST(DeviceGetCapsSimple, givenDeviceWhenEUCountIsZeroThenmaxWgsIsDefault) { - auto hardwareInfo = hardwareInfoTable[productFamily]; - GT_SYSTEM_INFO sysInfo = *hardwareInfo->pSysInfo; - sysInfo.EUCount = 0; - HardwareInfo hwInfo = {hardwareInfo->pPlatform, hardwareInfo->pSkuTable, hardwareInfo->pWaTable, &sysInfo, hardwareInfo->capabilityTable}; - - auto device = std::unique_ptr(DeviceHelper<>::create(&hwInfo)); - const auto &caps = device->getDeviceInfo(); - - //default value - uint32_t expected = 128u; - EXPECT_EQ(expected, caps.maxWorkGroupSize); -} - TEST(Device_GetCaps, givenDontForcePreemptionModeDebugVariableWhenCreateDeviceThenSetDefaultHwPreemptionMode) { DebugManagerStateRestore dbgRestorer; { diff --git a/unit_tests/gen8/test_device_caps_gen8.cpp b/unit_tests/gen8/test_device_caps_gen8.cpp index 935264d61c..6b0bba217c 100644 --- a/unit_tests/gen8/test_device_caps_gen8.cpp +++ b/unit_tests/gen8/test_device_caps_gen8.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "runtime/helpers/hw_helper.h" #include "unit_tests/fixtures/device_fixture.h" #include "test.h" @@ -80,6 +81,17 @@ BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) { EXPECT_EQ(80u, caps.outProfilingTimerResolution); } +BDWTEST_F(Gen8DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { + const auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily); + + uint32_t expectedValue = hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * + hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount; + + EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); + EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); +} + typedef Test BdwUsDeviceIdTest; BDWTEST_F(BdwUsDeviceIdTest, isSimulationCap) { diff --git a/unit_tests/gen9/test_device_caps_gen9.cpp b/unit_tests/gen9/test_device_caps_gen9.cpp index da966c4338..c516d0e94e 100644 --- a/unit_tests/gen9/test_device_caps_gen9.cpp +++ b/unit_tests/gen9/test_device_caps_gen9.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "runtime/helpers/hw_helper.h" #include "unit_tests/fixtures/device_fixture.h" #include "test.h" @@ -57,3 +58,14 @@ GEN9TEST_F(Gen9DeviceCaps, whitelistedRegisters) { GEN9TEST_F(Gen9DeviceCaps, compression) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrCompression); } + +GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { + const auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily); + + uint32_t expectedValue = hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * + hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount; + + EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); + EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); +}