Improve scratch allocation size calculation
Change-Id: I627bea89ce31e7110976cb88f9e9266e08af590a
This commit is contained in:
parent
e6a9d30951
commit
bd16f4bf2b
|
@ -221,7 +221,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
|
||||
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
|
||||
|
||||
size_t requiredScratchSizeInBytes = requiredScratchSize * (hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount);
|
||||
size_t requiredScratchSizeInBytes = requiredScratchSize * device->getDeviceInfo().computeUnitsUsedForScratch;
|
||||
|
||||
auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations();
|
||||
|
||||
|
@ -731,5 +731,4 @@ void CommandStreamReceiverHw<GfxFamily>::resetKmdNotifyHelper(KmdNotifyHelper *n
|
|||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) {
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -275,20 +275,16 @@ void Device::initializeCaps() {
|
|||
deviceInfo.numThreadsPerEU = 0;
|
||||
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8;
|
||||
|
||||
if (systemInfo.EUCount > 0) {
|
||||
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.pSkuTable->ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / systemInfo.SubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
|
||||
auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u;
|
||||
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
|
||||
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.pSkuTable->ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / systemInfo.SubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
|
||||
auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u;
|
||||
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
|
||||
|
||||
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
|
||||
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize);
|
||||
|
||||
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
|
||||
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize);
|
||||
} else {
|
||||
//default value if systemInfo not provided
|
||||
deviceInfo.maxWorkGroupSize = 128;
|
||||
}
|
||||
DEBUG_BREAK_IF(!DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() && deviceInfo.maxWorkGroupSize > 256);
|
||||
|
||||
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
|
||||
|
@ -310,9 +306,7 @@ void Device::initializeCaps() {
|
|||
systemInfo.MaxSlicesSupported,
|
||||
systemInfo.MaxSubSlicesSupported);
|
||||
|
||||
if (systemInfo.EUCount > 0) {
|
||||
deviceInfo.computeUnitsUsedForScratch = systemInfo.MaxSubSlicesSupported * systemInfo.MaxEuPerSubSlice * systemInfo.ThreadCount / systemInfo.EUCount;
|
||||
}
|
||||
deviceInfo.computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(&hwInfo);
|
||||
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "computeUnitsUsedForScratch: %d\n", deviceInfo.computeUnitsUsedForScratch);
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ class HwHelper {
|
|||
virtual uint32_t getBindingTableStateAlignement() const = 0;
|
||||
virtual size_t getInterfaceDescriptorDataSize() const = 0;
|
||||
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
|
||||
virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0;
|
||||
virtual void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) = 0;
|
||||
virtual bool setupPreemptionRegisters(HardwareInfo *pHwInfo, bool enable) = 0;
|
||||
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
|
||||
|
@ -81,6 +82,8 @@ class HwHelperHw : public HwHelper {
|
|||
|
||||
size_t getMaxBarrierRegisterPerSlice() const override;
|
||||
|
||||
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
|
||||
|
||||
void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) override;
|
||||
|
||||
bool setupPreemptionRegisters(HardwareInfo *pHwInfo, bool enable) override;
|
||||
|
|
|
@ -39,6 +39,12 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
|
|||
caps->image3DMaxWidth = 16384;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
|
||||
return pHwInfo->pSysInfo->MaxSubSlicesSupported * pHwInfo->pSysInfo->MaxEuPerSubSlice *
|
||||
pHwInfo->pSysInfo->ThreadCount / pHwInfo->pSysInfo->EUCount;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
|
||||
if (!debuggingActive) {
|
||||
|
|
|
@ -134,12 +134,6 @@ TEST(Device_GetCaps, validate) {
|
|||
EXPECT_GE((4 * GB) - (8 * KB), caps.maxMemAllocSize);
|
||||
EXPECT_LE(65536u, caps.imageMaxBufferSize);
|
||||
|
||||
if (sysInfo.EUCount > 0) {
|
||||
auto expected = sysInfo.MaxSubSlicesSupported * sysInfo.MaxEuPerSubSlice *
|
||||
sysInfo.ThreadCount / sysInfo.EUCount;
|
||||
EXPECT_EQ(expected, caps.computeUnitsUsedForScratch);
|
||||
}
|
||||
|
||||
EXPECT_GT(caps.maxWorkGroupSize, 0u);
|
||||
EXPECT_EQ(caps.maxWorkItemSizes[0], caps.maxWorkGroupSize);
|
||||
EXPECT_EQ(caps.maxWorkItemSizes[1], caps.maxWorkGroupSize);
|
||||
|
@ -218,20 +212,6 @@ TEST(Device_GetCaps, validateImage3DDimensions) {
|
|||
EXPECT_EQ(2048u, caps.image3DMaxDepth);
|
||||
}
|
||||
|
||||
TEST(DeviceGetCapsSimple, givenDeviceWhenEUCountIsZeroThenmaxWgsIsDefault) {
|
||||
auto hardwareInfo = hardwareInfoTable[productFamily];
|
||||
GT_SYSTEM_INFO sysInfo = *hardwareInfo->pSysInfo;
|
||||
sysInfo.EUCount = 0;
|
||||
HardwareInfo hwInfo = {hardwareInfo->pPlatform, hardwareInfo->pSkuTable, hardwareInfo->pWaTable, &sysInfo, hardwareInfo->capabilityTable};
|
||||
|
||||
auto device = std::unique_ptr<Device>(DeviceHelper<>::create(&hwInfo));
|
||||
const auto &caps = device->getDeviceInfo();
|
||||
|
||||
//default value
|
||||
uint32_t expected = 128u;
|
||||
EXPECT_EQ(expected, caps.maxWorkGroupSize);
|
||||
}
|
||||
|
||||
TEST(Device_GetCaps, givenDontForcePreemptionModeDebugVariableWhenCreateDeviceThenSetDefaultHwPreemptionMode) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
{
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "test.h"
|
||||
|
||||
|
@ -80,6 +81,17 @@ BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) {
|
|||
EXPECT_EQ(80u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
BDWTEST_F(Gen8DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) {
|
||||
const auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
|
||||
|
||||
uint32_t expectedValue = hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice *
|
||||
hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount;
|
||||
|
||||
EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo));
|
||||
EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
}
|
||||
|
||||
typedef Test<DeviceFixture> BdwUsDeviceIdTest;
|
||||
|
||||
BDWTEST_F(BdwUsDeviceIdTest, isSimulationCap) {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
|
||||
#include "test.h"
|
||||
|
@ -57,3 +58,14 @@ GEN9TEST_F(Gen9DeviceCaps, whitelistedRegisters) {
|
|||
GEN9TEST_F(Gen9DeviceCaps, compression) {
|
||||
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrCompression);
|
||||
}
|
||||
|
||||
GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) {
|
||||
const auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
|
||||
|
||||
uint32_t expectedValue = hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice *
|
||||
hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount;
|
||||
|
||||
EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo));
|
||||
EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue