mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Improve calculateAvailableThreadCount implementation
Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
fe0c857f1a
commit
af91f94098
@@ -89,8 +89,7 @@ class HwHelper {
|
||||
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool isLinearStoragePreferred(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
||||
virtual uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
||||
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) = 0;
|
||||
|
||||
@@ -292,7 +291,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const override;
|
||||
|
||||
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
|
||||
uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) override;
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) override;
|
||||
|
||||
|
||||
@@ -93,9 +93,8 @@ uint32_t HwHelperHw<GfxFamily>::getMocsIndex(const GmmHelper &gmmHelper, bool l3
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
return threadsPerEu * euCount;
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -72,4 +72,10 @@ size_t HwHelperHw<Family>::getPaddingForISAAllocation() const {
|
||||
return 0xE00;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) {
|
||||
auto maxThreadsPerEuCount = 1024u / grfCount;
|
||||
return maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -129,12 +129,11 @@ uint32_t HwHelperHw<GfxFamily>::getMocsIndex(const GmmHelper &gmmHelper, bool l3
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) {
|
||||
if (grfCount > GrfConfig::DefaultGrfNumber) {
|
||||
return threadsPerEu / 2u * euCount;
|
||||
return hwInfo.gtSystemInfo.ThreadCount / 2u;
|
||||
}
|
||||
return threadsPerEu * euCount;
|
||||
return hwInfo.gtSystemInfo.ThreadCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
Reference in New Issue
Block a user