From 5c5c718af373efaee4eb688804a186f7b429bcb1 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Fri, 21 Jul 2023 14:01:51 +0000 Subject: [PATCH] performance: detect indirect access in kernel, PVC Enabling on pvc after patch in igc. Enabling only for JIT kernels because AOT could have been compiled with IGC older than required. Related-To: NEO-7712 Signed-off-by: Dominik Dabek --- level_zero/core/source/kernel/kernel_imp.cpp | 4 ++-- level_zero/core/source/module/module_imp.cpp | 4 ++++ level_zero/core/source/module/module_imp.h | 5 ++++- opencl/source/kernel/kernel.cpp | 2 +- opencl/source/program/program.h | 3 +++ shared/source/os_interface/product_helper.h | 2 +- shared/source/os_interface/product_helper.inl | 2 +- shared/source/os_interface/product_helper_hw.h | 2 +- .../pvc/os_agnostic_product_helper_pvc.inl | 6 ++++-- .../unit_test/os_interface/product_helper_tests.cpp | 12 ++++++++---- .../xe_hpc_core/pvc/test_product_helper_pvc.cpp | 12 ++++++++---- 11 files changed, 37 insertions(+), 17 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 290aea723b..f0a125e90c 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1029,8 +1029,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(), kernelImmData->getResidencyContainer().end()); - - bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor); + ModuleImp *moduleImp = reinterpret_cast(this->module); + bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor, moduleImp->isPrecompiled()); if (NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) { detectIndirectAccessInKernel = NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() == 1; } diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index b5618a247a..91faa47cab 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -625,12 +625,14 @@ inline ze_result_t ModuleImp::initializeTranslationUnit(const ze_module_desc_t * } // If the user passed in only 1 SPIRV, then fallback to standard build if (inputSpirVs.size() > 1) { + this->precompiled = false; return this->translationUnit->staticLinkSpirV(inputSpirVs, inputModuleSizes, buildOptions.c_str(), internalBuildOptions.c_str(), specConstants); } else { + this->precompiled = false; return this->translationUnit->buildFromSpirV(reinterpret_cast(programExpDesc->pInputModules[0]), inputModuleSizes[0], buildOptions.c_str(), @@ -661,9 +663,11 @@ inline ze_result_t ModuleImp::initializeTranslationUnit(const ze_module_desc_t * // Assume Symbol Generation Given Prebuilt Binary this->isFunctionSymbolExportEnabled = true; this->isGlobalSymbolExportEnabled = true; + this->precompiled = true; return this->translationUnit->createFromNativeBinary(reinterpret_cast(desc->pInputModule), desc->inputSize); } else if (desc->format == ZE_MODULE_FORMAT_IL_SPIRV) { this->builtFromSPIRv = true; + this->precompiled = false; return this->translationUnit->buildFromSpirV(reinterpret_cast(desc->pInputModule), static_cast(desc->inputSize), buildOptions.c_str(), diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index 6c295ebd2e..704b60d0a3 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -86,7 +86,7 @@ struct ModuleTranslationUnit { NEO::specConstValuesMap specConstantsValues; bool isBuiltIn{false}; - bool isGeneratedByIgc = true; + bool isGeneratedByIgc{true}; }; struct ModuleImp : public Module { @@ -143,6 +143,8 @@ struct ModuleImp : public Module { bool isSPIRv() { return builtFromSPIRv; } + bool isPrecompiled() { return precompiled; } + bool shouldAllocatePrivateMemoryPerDispatch() const override { return allocatePrivateMemoryPerDispatch; } @@ -194,6 +196,7 @@ struct ModuleImp : public Module { bool isZebinBinary = false; bool isFunctionSymbolExportEnabled = false; bool isGlobalSymbolExportEnabled = false; + bool precompiled = false; ModuleType type; NEO::Linker::UnresolvedExternals unresolvedExternalsInfo{}; std::set importedSymbolAllocations{}; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 8500115b15..56d5d743bb 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -286,7 +286,7 @@ cl_int Kernel::initialize() { slmSizes.resize(numArgs); this->setInlineSamplers(); - bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor); + bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor, program->getCreatedFromBinary()); if (DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) { detectIndirectAccessInKernel = DebugManager.flags.DetectIndirectAccessInKernel.get() == 1; } diff --git a/opencl/source/program/program.h b/opencl/source/program/program.h index d53c846223..37e692b165 100644 --- a/opencl/source/program/program.h +++ b/opencl/source/program/program.h @@ -259,6 +259,9 @@ class Program : public BaseObject<_cl_program> { std::unique_lock lock{lockMutex}; return 0 != exposedKernels; } + bool getCreatedFromBinary() const { + return isCreatedFromBinary; + } const ExecutionEnvironment &getExecutionEnvironment() const { return executionEnvironment; } diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index a059c4a10b..0a8c4f39e7 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -166,7 +166,7 @@ class ProductHelper { virtual bool isBufferPoolAllocatorSupported() const = 0; virtual bool isTlbFlushRequired() const = 0; virtual bool isDummyBlitWaRequired() const = 0; - virtual bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const = 0; + virtual bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor, const bool isPrecompiled) const = 0; virtual bool isLinearStoragePreferred(bool isImage1d, bool forceLinearStorage) const = 0; virtual bool isTranslationExceptionSupported() const = 0; virtual uint32_t getMaxNumSamplers() const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index ecf3c033c5..8af9a7be33 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -69,7 +69,7 @@ bool ProductHelperHw::isTlbFlushRequired() const { } template -bool ProductHelperHw::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const { +bool ProductHelperHw::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor, const bool isPrecompiled) const { constexpr bool enabled = false; return enabled; } diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 2d7833d6dd..030a285489 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -118,7 +118,7 @@ class ProductHelperHw : public ProductHelper { bool isBufferPoolAllocatorSupported() const override; bool isTlbFlushRequired() const override; bool isDummyBlitWaRequired() const override; - bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const override; + bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor, const bool isPrecompiled) const override; bool isLinearStoragePreferred(bool isImage1d, bool forceLinearStorage) const override; bool isTranslationExceptionSupported() const override; uint32_t getMaxNumSamplers() const override; diff --git a/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl b/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl index 3bf3ac30f1..8c193e85e3 100644 --- a/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl +++ b/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl @@ -197,8 +197,10 @@ bool ProductHelperHw::isStatefulAddressingModeSupported() const { } template <> -bool ProductHelperHw::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const { - return false; +bool ProductHelperHw::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor, const bool isPrecompiled) const { + const bool isZebin = kernelDescriptor.kernelAttributes.binaryFormat == DeviceBinaryFormat::Zebin; + const bool isCMKernelHeuristic = kernelDescriptor.kernelAttributes.simdSize == 1; + return !isPrecompiled && isZebin && !isCMKernelHeuristic; } template <> diff --git a/shared/test/unit_test/os_interface/product_helper_tests.cpp b/shared/test/unit_test/os_interface/product_helper_tests.cpp index da6519b9d5..f681bd5e46 100644 --- a/shared/test/unit_test/os_interface/product_helper_tests.cpp +++ b/shared/test/unit_test/os_interface/product_helper_tests.cpp @@ -740,22 +740,26 @@ HWTEST_F(ProductHelperTest, givenProductHelperAndKernelBinaryFormatsWhenChecking { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; kernelDescriptor.kernelAttributes.simdSize = 8u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; kernelDescriptor.kernelAttributes.simdSize = 1u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; kernelDescriptor.kernelAttributes.simdSize = 1u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; kernelDescriptor.kernelAttributes.simdSize = 8u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } } diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp index 850dc21496..22b914d909 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp @@ -291,22 +291,26 @@ PVCTEST_F(PvcProductHelper, givenPvcProductHelperAndKernelBinaryFormatsWhenCheck { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; kernelDescriptor.kernelAttributes.simdSize = 8u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; kernelDescriptor.kernelAttributes.simdSize = 1u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; kernelDescriptor.kernelAttributes.simdSize = 1u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } { kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; kernelDescriptor.kernelAttributes.simdSize = 8u; - EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + EXPECT_TRUE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, false)); + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor, true)); } }