Apply heuristics when setting TG dispatch size on XE_HPC_CORE

The default TG dispatch size can be changed to a better value based on number of threads in TG or currently available amount of threads on GPU. Decision on what TG dispatch size should be are based on implemented heuristics. Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com> Related-To: NEO-6989
2025-12-21 09:14:47 +08:00 · 2022-08-03 12:22:30 +00:00
parent 52133e61ce
commit ed0c36117e
21 changed files with 188 additions and 19 deletions
--- a/shared/source/kernel/kernel_descriptor.h
+++ b/shared/source/kernel/kernel_descriptor.h
@@ -11,6 +11,7 @@
 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/debug_helpers.h"
 #include "shared/source/kernel/debug_data.h"
+#include "shared/source/kernel/grf_config.h"
 #include "shared/source/kernel/kernel_arg_descriptor.h"
 #include "shared/source/kernel/kernel_arg_metadata.h"
 #include "shared/source/utilities/stackvec.h"
@@ -149,7 +150,7 @@ struct KernelDescriptor {
        uint16_t inlineDataPayloadSize = 0U;
        uint16_t perThreadDataSize = 0U;
        uint16_t numArgsToPatch = 0U;
-        uint16_t numGrfRequired = 0U;
+        uint16_t numGrfRequired = GrfConfig::DefaultGrfNumber;
        uint8_t barrierCount = 0u;
        bool hasNonKernelArgLoad = true;
        bool hasNonKernelArgStore = true;