diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 27ed054d62..8239eab8f4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -678,12 +678,13 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotD auto kernelSshSize = alignUp(kernel->getSurfaceStateHeapDataSize(), FamilyType::cacheLineSize); auto ssh = commandList->getCmdContainer().getIndirectHeap(NEO::HeapType::surfaceState); - auto sshHeapSize = ssh->getMaxAvailableSpace(); + auto sshHeapSize = ssh->getAvailableSpace(); + ssh->getSpace(sshHeapSize - (kernelSshSize / 2)); auto initialAllocation = ssh->getGraphicsAllocation(); EXPECT_NE(nullptr, initialAllocation); const_cast(kernel->getKernelDescriptor().kernelAttributes.bufferAddressingMode) = KernelDescriptor::BindfulAndStateless; CmdListKernelLaunchParams launchParams = {}; - for (size_t i = 0; i < sshHeapSize / kernelSshSize + 1; i++) { + for (size_t i = 0; i < 2; i++) { auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 4860935482..68470c30f5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -312,6 +312,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelWithPrintfBufferCrea kernel->privateState.pImplicitArgs->v0.header.structSize = ImplicitArgsV0::getSize(); UnitTestHelper::adjustKernelDescriptorForImplicitArgs(*kernel->immutableData.kernelDescriptor); kernel->createPrintfBuffer(); + kernel->setGroupSize(8, 1, 1); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; @@ -344,6 +345,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenApp kernel->setModule(&module); kernel->descriptor.kernelAttributes.flags.usesPrintf = false; + kernel->setGroupSize(8, 1, 1); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; @@ -372,6 +374,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen kernel->setModule(&module); kernel->descriptor.kernelAttributes.flags.usesPrintf = true; kernel->createPrintfBuffer(); + kernel->setGroupSize(8, 1, 1); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; @@ -411,6 +414,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelAndPrintfBufferForSt kernel->privateState.pImplicitArgs->v0.header.structSize = ImplicitArgsV0::getSize(); UnitTestHelper::adjustKernelDescriptorForImplicitArgs(*kernel->immutableData.kernelDescriptor); kernel->createPrintfBuffer(); + kernel->setGroupSize(8, 1, 1); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; @@ -1256,6 +1260,7 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit kernel.privateState.pImplicitArgs.reset(new ImplicitArgs()); kernel.privateState.pImplicitArgs->v0.header.structVersion = 0; kernel.privateState.pImplicitArgs->v0.header.structSize = ImplicitArgsV0::getSize(); + kernel.privateState.pImplicitArgs->setLocalSize(1, 1, 1); UnitTestHelper::adjustKernelDescriptorForImplicitArgs(*kernel.immutableData.kernelDescriptor); kernel.setGroupSize(1, 1, 1); diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index ecc1e77fb5..a271748ddf 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -81,7 +81,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( localWorkSize[0] = pImplicitArgs->v1.localSizeX; localWorkSize[1] = pImplicitArgs->v1.localSizeY; localWorkSize[2] = pImplicitArgs->v1.localSizeZ; - } else if (pImplicitArgs->v1.header.structVersion == 2) { + } else if (pImplicitArgs->v2.header.structVersion == 2) { localWorkSize[0] = pImplicitArgs->v2.localSizeX; localWorkSize[1] = pImplicitArgs->v2.localSizeY; localWorkSize[2] = pImplicitArgs->v2.localSizeZ; diff --git a/opencl/test/unit_test/api/cl_build_program_tests.inl b/opencl/test/unit_test/api/cl_build_program_tests.inl index a9ec781178..a9e22a4e93 100644 --- a/opencl/test/unit_test/api/cl_build_program_tests.inl +++ b/opencl/test/unit_test/api/cl_build_program_tests.inl @@ -597,6 +597,104 @@ TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltFo EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(ClBuildProgramMultiDeviceTests, GivenProgramCreatedFromSourceWhenBuildingThenCorrectlyFilledSingleDeviceBinaryIsUsed) { + MockUnrestrictiveContextMultiGPU context; + cl_program pProgram = nullptr; + + std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===(' +kernels: + - name : some_kernel + execution_env : + simd_size : 32 + require_iab: true + - name : some_other_kernel + execution_env : + simd_size : 32 +)==="; + + uint8_t kernelIsa[8]{0U}; + ZebinTestData::ValidEmptyProgram zebin; + zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo); + zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef::fromAny(zeinfo.data(), zeinfo.size())); + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)}); + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)}); + + const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'}; + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data); + + { + const uint32_t indirectAccessBufferMajorVersion = 4u; + + Zebin::Elf::ElfNoteSection elfNoteSection = {}; + elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion; + elfNoteSection.descSize = sizeof(uint32_t); + elfNoteSection.nameSize = 8u; + + auto sectionDataSize = sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + alignUp(elfNoteSection.descSize, 4); + auto noteIntelGTSectionData = std::make_unique(sectionDataSize); + auto appendSingleIntelGTSectionData = [](const NEO::Elf::ElfNoteSection &elfNoteSection, uint8_t *const intelGTSectionData, const uint8_t *descData, const char *ownerName, size_t spaceAvailable) { + size_t offset = 0; + ASSERT_GE(spaceAvailable, sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize); + memcpy_s(ptrOffset(intelGTSectionData, offset), sizeof(NEO::Elf::ElfNoteSection), &elfNoteSection, sizeof(NEO::Elf::ElfNoteSection)); + offset += sizeof(NEO::Elf::ElfNoteSection); + memcpy_s(reinterpret_cast(ptrOffset(intelGTSectionData, offset)), elfNoteSection.nameSize, ownerName, elfNoteSection.nameSize); + offset += elfNoteSection.nameSize; + memcpy_s(ptrOffset(intelGTSectionData, offset), elfNoteSection.descSize, descData, elfNoteSection.descSize); + offset += elfNoteSection.descSize; + }; + + appendSingleIntelGTSectionData(elfNoteSection, noteIntelGTSectionData.get(), reinterpret_cast(&indirectAccessBufferMajorVersion), + Zebin::Elf::intelGTNoteOwnerName.str().c_str(), sectionDataSize); + zebin.appendSection(Zebin::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef::fromAny(noteIntelGTSectionData.get(), sectionDataSize)); + } + + MockCompilerDebugVars debugVars; + debugVars.binaryToReturn = const_cast(zebin.storage.data()); + debugVars.binaryToReturnSize = zebin.storage.size(); + gEnvironment->igcPushDebugVars(debugVars); + gEnvironment->fclPushDebugVars(debugVars); + + cl_int retVal = CL_INVALID_PROGRAM; + pProgram = clCreateProgramWithSource( + &context, + 1, + sources, + &sourceKernelSize, + &retVal); + + EXPECT_NE(nullptr, pProgram); + ASSERT_EQ(CL_SUCCESS, retVal); + + cl_device_id firstDevice = context.pRootDevice0; + cl_device_id secondDevice = context.pRootDevice1; + cl_device_id devices[] = {firstDevice, secondDevice}; + + retVal = clBuildProgram( + pProgram, + 2, + devices, + nullptr, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + cl_kernel pKernel = clCreateKernel(pProgram, "some_kernel", &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + MultiDeviceKernel *kernel = castToObject(pKernel); + Program *program = castToObject(pProgram); + EXPECT_EQ(4u, program->getIndirectAccessBufferVersion()); + EXPECT_FALSE(kernel->getKernelInfos()[1]->kernelDescriptor.kernelMetadata.isGeneratedByIgc); + + retVal = clReleaseKernel(pKernel); + EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clReleaseProgram(pProgram); + EXPECT_EQ(CL_SUCCESS, retVal); + + gEnvironment->igcPopDebugVars(); + gEnvironment->fclPopDebugVars(); +} + TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 6da66fed81..9c87f018b1 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -3934,6 +3934,7 @@ HWTEST_F(KernelImplicitArgsTest, givenGfxCoreRequiringImplicitArgsV1WhenSettingK ASSERT_NE(nullptr, pImplicitArgs); ImplicitArgsV1 expectedImplicitArgs = {{ImplicitArgsV1::getSize(), 1}}; + expectedImplicitArgs.simdWidth = kernel.getDescriptor().kernelAttributes.simdSize; expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.localSizeX = 4; expectedImplicitArgs.localSizeY = 5; diff --git a/shared/source/compiler_interface/linker.cpp b/shared/source/compiler_interface/linker.cpp index b4b49852d2..ed3787eb7c 100644 --- a/shared/source/compiler_interface/linker.cpp +++ b/shared/source/compiler_interface/linker.cpp @@ -681,11 +681,16 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs; if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { uint64_t implicitArgsSize = 0; - if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) { + uint8_t version = kernelDescriptor.kernelMetadata.indirectAccessBuffer; + if (version == 0) { + version = pDevice->getGfxCoreHelper().getImplicitArgsVersion(); + } + + if (version == 0) { implicitArgsSize = ImplicitArgsV0::getAlignedSize(); - } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 1) { + } else if (version == 1) { implicitArgsSize = ImplicitArgsV1::getAlignedSize(); - } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 2) { + } else if (version == 2) { implicitArgsSize = ImplicitArgsV2::getAlignedSize(); } else { UNRECOVERABLE_IF(true); diff --git a/shared/source/device_binary_format/device_binary_format_zebin.cpp b/shared/source/device_binary_format/device_binary_format_zebin.cpp index 59cfc1652f..5e50ae2b5e 100644 --- a/shared/source/device_binary_format/device_binary_format_zebin.cpp +++ b/shared/source/device_binary_format/device_binary_format_zebin.cpp @@ -59,8 +59,9 @@ SingleDeviceBinary unpackSingleZebin(const ArrayRef archive, cons bool validForTarget = true; if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) { - validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret); + validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret.generatorFeatureVersions, ret.generator); } else { + Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret.generatorFeatureVersions, ret.generator); const auto &flags = reinterpret_cast(elf.elfFileHeader->flags); validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily ? (requestedTargetDevice.coreFamily == static_cast(elf.elfFileHeader->machine)) @@ -114,10 +115,16 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s return DecodeError::invalidBinary; } + GeneratorFeatureVersions generatorFeatures = {}; + GeneratorType generator = {}; + auto ret = Zebin::validateTargetDevice(elf, src.targetDevice, outErrReason, outWarning, generatorFeatures, generator); + if (!ret && elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) { + return DecodeError::invalidBinary; + } dst.grfSize = src.targetDevice.grfSize; dst.minScratchSpaceSize = src.targetDevice.minScratchSpaceSize; - dst.indirectDetectionVersion = src.generatorFeatureVersions.indirectMemoryAccessDetection; - dst.indirectAccessBufferMajorVersion = src.generatorFeatureVersions.indirectAccessBuffer; + dst.indirectDetectionVersion = generatorFeatures.indirectMemoryAccessDetection; + dst.indirectAccessBufferMajorVersion = generatorFeatures.indirectAccessBuffer; dst.samplerStateSize = src.targetDevice.samplerStateSize; dst.samplerBorderColorStateSize = src.targetDevice.samplerBorderColorStateSize; @@ -126,10 +133,11 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s return decodeError; } - const bool isGeneratedByIgc = src.generator == GeneratorType::igc; + const bool isGeneratedByIgc = generator == GeneratorType::igc; for (auto &kernelInfo : dst.kernelInfos) { kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc; + kernelInfo->kernelDescriptor.kernelMetadata.indirectAccessBuffer = generatorFeatures.indirectAccessBuffer; if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState(); diff --git a/shared/source/device_binary_format/device_binary_formats.h b/shared/source/device_binary_format/device_binary_formats.h index 9146c6beed..7ee4f4aac8 100644 --- a/shared/source/device_binary_format/device_binary_formats.h +++ b/shared/source/device_binary_format/device_binary_formats.h @@ -75,6 +75,12 @@ struct TargetDevice { }; TargetDevice getTargetDevice(const RootDeviceEnvironment &rootDeviceEnvironment); +struct GeneratorFeatureVersions { + using VersionT = uint32_t; + VersionT indirectMemoryAccessDetection = 0u; + VersionT indirectAccessBuffer = 0u; +}; + struct SingleDeviceBinary { DeviceBinaryFormat format = DeviceBinaryFormat::unknown; ArrayRef deviceBinary; @@ -84,11 +90,7 @@ struct SingleDeviceBinary { ConstStringRef buildOptions; TargetDevice targetDevice; GeneratorType generator = GeneratorType::igc; - struct GeneratorFeatureVersions { - using VersionT = uint32_t; - VersionT indirectMemoryAccessDetection = 0u; - VersionT indirectAccessBuffer = 0u; - } generatorFeatureVersions; + GeneratorFeatureVersions generatorFeatureVersions; }; template diff --git a/shared/source/device_binary_format/zebin/zebin_decoder.cpp b/shared/source/device_binary_format/zebin/zebin_decoder.cpp index 3eac29c655..1bd9679f32 100644 --- a/shared/source/device_binary_format/zebin/zebin_decoder.cpp +++ b/shared/source/device_binary_format/zebin/zebin_decoder.cpp @@ -92,10 +92,10 @@ bool validateTargetDevice(const TargetDevice &targetDevice, Elf::ElfIdentifierCl return true; } -template bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary); -template bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary); +template bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator); +template bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator); template -bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary) { +bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator) { GFXCORE_FAMILY gfxCore = IGFX_UNKNOWN_CORE; PRODUCT_FAMILY productFamily = IGFX_UNKNOWN; AOT::PRODUCT_CONFIG productConfig = AOT::UNKNOWN_ISA; @@ -123,7 +123,7 @@ bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targ DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size()); auto targetMetadataPacked = reinterpret_cast(intelGTNote.data.begin()); targetMetadata.packed = static_cast(*targetMetadataPacked); - singleDeviceBinary.generator = static_cast(targetMetadata.generatorId); + generator = static_cast(targetMetadata.generatorId); break; } case Elf::IntelGTSectionType::zebinVersion: { @@ -155,13 +155,13 @@ bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targ case Elf::IntelGTSectionType::indirectAccessDetectionVersion: { DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size()); auto indirectDetectionVersion = reinterpret_cast(intelGTNote.data.begin()); - singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection = static_cast(*indirectDetectionVersion); + generatorFeatures.indirectMemoryAccessDetection = static_cast(*indirectDetectionVersion); break; } case Elf::IntelGTSectionType::indirectAccessBufferMajorVersion: { DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size()); auto indirectDetectionVersion = reinterpret_cast(intelGTNote.data.begin()); - singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer = static_cast(*indirectDetectionVersion); + generatorFeatures.indirectAccessBuffer = static_cast(*indirectDetectionVersion); break; } default: diff --git a/shared/source/device_binary_format/zebin/zebin_decoder.h b/shared/source/device_binary_format/zebin/zebin_decoder.h index 791d2d2cd6..9be81e3b36 100644 --- a/shared/source/device_binary_format/zebin/zebin_decoder.h +++ b/shared/source/device_binary_format/zebin/zebin_decoder.h @@ -52,7 +52,7 @@ bool isZebin(ArrayRef binary); bool validateTargetDevice(const TargetDevice &targetDevice, Elf::ElfIdentifierClass numBits, PRODUCT_FAMILY productFamily, GFXCORE_FAMILY gfxCore, AOT::PRODUCT_CONFIG productConfig, Zebin::Elf::ZebinTargetFlags targetMetadata); template -bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary); +bool validateTargetDevice(const Elf::Elf &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator); template DecodeError decodeIntelGTNoteSection(ArrayRef intelGTNotesSection, std::vector &intelGTNotes, std::string &outErrReason, std::string &outWarning); diff --git a/shared/source/kernel/definitions/implicit_args.h b/shared/source/kernel/definitions/implicit_args.h index 8f8fe232a8..7481e614f7 100644 --- a/shared/source/kernel/definitions/implicit_args.h +++ b/shared/source/kernel/definitions/implicit_args.h @@ -56,7 +56,7 @@ static_assert(ImplicitArgsV0::getSize() == (28 * sizeof(uint32_t))); struct alignas(32) ImplicitArgsV1 { ImplicitArgsHeader header; uint8_t numWorkDim; - uint8_t padding0; + uint8_t simdWidth; uint32_t localSizeX; uint32_t localSizeY; uint32_t localSizeZ; @@ -71,7 +71,7 @@ struct alignas(32) ImplicitArgsV1 { uint32_t groupCountX; uint32_t groupCountY; uint32_t groupCountZ; - uint32_t padding1; + uint32_t padding0; uint64_t rtGlobalBufferPtr; uint64_t assertBufferPtr; uint64_t scratchPtr; @@ -183,12 +183,16 @@ struct alignas(32) ImplicitArgs { void setSimdWidth(uint32_t simd) { if (v0.header.structVersion == 0) { v0.simdWidth = simd; + } else if (v1.header.structVersion == 1) { + v1.simdWidth = simd; } } std::optional getSimdWidth() const { if (v0.header.structVersion == 0) { return v0.simdWidth; + } else if (v1.header.structVersion == 1) { + return v1.simdWidth; } return std::nullopt; } @@ -328,6 +332,12 @@ struct alignas(32) ImplicitArgs { } } + void setScratchBufferPtr(uint64_t scratchBuffer) { + if (v1.header.structVersion == 1) { + v1.scratchPtr = scratchBuffer; + } + } + void setEnqueuedLocalSize(uint32_t x, uint32_t y, uint32_t z) { if (v1.header.structVersion == 1) { v1.enqueuedLocalSizeX = x; diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp index b99245703b..2c91017183 100644 --- a/shared/source/kernel/implicit_args_helper.cpp +++ b/shared/source/kernel/implicit_args_helper.cpp @@ -59,7 +59,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); uint32_t localIdsSize = 0; if (false == patchImplicitArgsBufferInCrossThread) { - auto simdSize = 32u; + auto simdSize = kernelDescriptor.kernelAttributes.simdSize; auto grfSize = NEO::ImplicitArgsHelper::getGrfSize(simdSize); auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired; @@ -91,7 +91,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons uint32_t lws[3] = {0, 0, 0}; implicitArgs.getLocalSize(lws[0], lws[1], lws[2]); - auto simdSize = implicitArgs.getSimdWidth().value_or(32); + auto simdSize = kernelDescriptor.kernelAttributes.simdSize; auto grfSize = getGrfSize(simdSize); auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired; auto dimensionOrder = getDimensionOrderForLocalIds(kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, hwGenerationOfLocalIdsParams); diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 6bd100ee9a..894531c463 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -268,6 +268,7 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass { uint16_t compiledSubGroupsNumber = 0U; uint8_t requiredSubGroupSize = 0U; uint8_t requiredThreadGroupDispatchSize = 0U; + uint8_t indirectAccessBuffer = 0u; bool isGeneratedByIgc = true; } kernelMetadata; diff --git a/shared/test/unit_test/compiler_interface/linker_tests.cpp b/shared/test/unit_test/compiler_interface/linker_tests.cpp index a47efbcf17..9e9a76247e 100644 --- a/shared/test/unit_test/compiler_interface/linker_tests.cpp +++ b/shared/test/unit_test/compiler_interface/linker_tests.cpp @@ -2313,6 +2313,82 @@ TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsOrRequiredImplicitArg EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); } +HWTEST_F(LinkerTests, givenImplicitArgRelocationAndKernelDescriptorWithImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) { + DebugManagerStateRestore restore; + struct MockGfxCoreHelper : NEO::GfxCoreHelperHw { + uint32_t getImplicitArgsVersion() const override { + return 0; + } + }; + + NEO::LinkerInput linkerInput; + + vISA::GenRelocEntry reloc = {}; + std::string relocationName{implicitArgsRelocationSymbolName}; + memcpy_s(reloc.r_symbol, 1024, relocationName.c_str(), relocationName.size()); + reloc.r_offset = 8; + reloc.r_type = vISA::GenRelocType::R_SYM_ADDR_32; + + vISA::GenRelocEntry reloc64 = {}; + memcpy_s(reloc64.r_symbol, 1024, relocationName.c_str(), relocationName.size()); + reloc64.r_offset = 16; + reloc64.r_type = vISA::GenRelocType::R_SYM_ADDR; + + vISA::GenRelocEntry relocs[] = {reloc, reloc64}; + constexpr uint32_t numRelocations = 2; + bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0); + EXPECT_TRUE(decodeRelocSuccess); + + NEO::Linker linker(linkerInput); + NEO::Linker::SegmentInfo globalVarSegment, globalConstSegment, exportedFuncSegment; + globalVarSegment.gpuAddress = 8; + globalVarSegment.segmentSize = 64; + globalConstSegment.gpuAddress = 128; + globalConstSegment.segmentSize = 256; + exportedFuncSegment.gpuAddress = 4096; + exportedFuncSegment.segmentSize = 1024; + NEO::Linker::UnresolvedExternals unresolvedExternals; + NEO::Linker::KernelDescriptorsT kernelDescriptors; + NEO::Linker::ExternalFunctionsT externalFunctions; + KernelDescriptor kernelDescriptor; + kernelDescriptors.push_back(&kernelDescriptor); + kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; + kernelDescriptor.kernelAttributes.flags.useStackCalls = true; + kernelDescriptor.kernelMetadata.indirectAccessBuffer = 1; + + HardwareInfo hwInfo = *defaultHwInfo; + MockExecutionEnvironment executionEnvironment(&hwInfo, false, 1); + executionEnvironment.incRefInternal(); + + UltDeviceFactory deviceFactory{1, 0, executionEnvironment}; + auto rootDeviceIndex = deviceFactory.rootDevices[0]->getRootDeviceIndex(); + RAIIGfxCoreHelperFactory raii(*deviceFactory.rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]); + + std::vector instructionSegment; + uint32_t initData = 0x77777777; + instructionSegment.resize(32, static_cast(initData)); + NEO::Linker::PatchableSegment seg0; + seg0.hostPointer = instructionSegment.data(); + seg0.segmentSize = instructionSegment.size(); + NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; + + auto linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {}, + nullptr, nullptr, patchableInstructionSegments, unresolvedExternals, + deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions); + EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult); + + auto addressToPatch = reinterpret_cast(instructionSegment.data() + reloc.r_offset); + + EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), *addressToPatch); + EXPECT_EQ(initData, *(addressToPatch - 1)); + EXPECT_EQ(initData, *(addressToPatch + 1)); + + auto addressToPatch64 = (instructionSegment.data() + reloc64.r_offset); + uint64_t patchedValue64 = 0; + memcpy_s(&patchedValue64, sizeof(patchedValue64), addressToPatch64, sizeof(patchedValue64)); + EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), patchedValue64); +} + HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) { DebugManagerStateRestore restore; struct MockGfxCoreHelper : NEO::GfxCoreHelperHw { diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index 21b7530833..24776d77d4 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -3287,46 +3287,6 @@ TEST(DecodeSingleDeviceBinaryZebin, GivenConstDataStringsSectionThenSetsUpInitDa EXPECT_EQ(0, memcmp(programInfo.globalStrings.initData, data, sizeof(data))); } -TEST(DecodeSingleDeviceBinaryZebin, GivenIabVersion2NoPrintfAttributeInZeInfoAndConstDataStringsSectionWhenDecodingBinaryThenKernelsWithIabRequiredHasFlagUsesPrintfSetToTrue) { - NEO::MockExecutionEnvironment mockExecutionEnvironment{}; - auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); - std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===(' -kernels: - - name : some_kernel - execution_env : - simd_size : 8 - require_iab: true - - name : some_other_kernel - execution_env : - simd_size : 32 -)==="; - - uint8_t kernelIsa[8]{0U}; - ZebinTestData::ValidEmptyProgram zebin; - zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo); - zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef::fromAny(zeinfo.data(), zeinfo.size())); - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)}); - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)}); - - const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'}; - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data); - - NEO::ProgramInfo programInfo; - NEO::SingleDeviceBinary singleBinary; - singleBinary.deviceBinary = zebin.storage; - singleBinary.generatorFeatureVersions.indirectAccessBuffer = 2; - std::string errors; - std::string warnings; - auto error = NEO::decodeSingleDeviceBinary(programInfo, singleBinary, errors, warnings, gfxCoreHelper); - EXPECT_EQ(NEO::DecodeError::success, error); - EXPECT_TRUE(warnings.empty()) << warnings; - EXPECT_TRUE(errors.empty()) << errors; - - ASSERT_EQ(2u, programInfo.kernelInfos.size()); - EXPECT_TRUE(programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.flags.usesPrintf); - EXPECT_FALSE(programInfo.kernelInfos[1]->kernelDescriptor.kernelAttributes.flags.usesPrintf); -} - TEST(DecodeSingleDeviceBinaryZebin, GivenIntelGTNoteSectionThenAddsItToZebinSections) { ZebinTestData::ValidEmptyProgram zebin; @@ -6148,6 +6108,71 @@ class IntelGTNotesFixture : public ::testing::Test { ZebinTestData::ValidEmptyProgram<> zebin; }; +TEST_F(IntelGTNotesFixture, GivenIabVersion2NoPrintfAttributeInZeInfoAndConstDataStringsSectionWhenDecodingBinaryThenKernelsWithIabRequiredHasFlagUsesPrintfSetToTrue) { + NEO::MockExecutionEnvironment mockExecutionEnvironment{}; + auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); + std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===(' +kernels: + - name : some_kernel + execution_env : + simd_size : 8 + require_iab: true + - name : some_other_kernel + execution_env : + simd_size : 32 +)==="; + + uint8_t kernelIsa[8]{0U}; + ZebinTestData::ValidEmptyProgram zebin; + zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo); + zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef::fromAny(zeinfo.data(), zeinfo.size())); + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)}); + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)}); + + const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'}; + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data); + + const uint32_t indirectAccessBufferMajorVersion = 2u; + + { + NEO::Elf::ElfNoteSection elfNoteSection; + + elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion; + elfNoteSection.descSize = sizeof(uint32_t); + elfNoteSection.nameSize = 8u; + + std::vector descData; + uint8_t indirectAccessBufferData[4]; + memcpy_s(indirectAccessBufferData, 4, &indirectAccessBufferMajorVersion, 4); + descData.push_back(indirectAccessBufferData); + + const auto sectionDataSize = sizeof(NEO::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize; + + auto noteIntelGTSectionData = std::make_unique(sectionDataSize); + memcpy_s(noteIntelGTSectionData.get(), sizeof(NEO::Elf::ElfNoteSection), &elfNoteSection, sizeof(NEO::Elf::ElfNoteSection)); + auto offset = sizeof(NEO::Elf::ElfNoteSection); + memcpy_s(reinterpret_cast(ptrOffset(noteIntelGTSectionData.get(), offset)), elfNoteSection.nameSize, Zebin::Elf::intelGTNoteOwnerName.str().c_str(), elfNoteSection.nameSize); + offset += elfNoteSection.nameSize; + memcpy_s(ptrOffset(noteIntelGTSectionData.get(), offset), elfNoteSection.descSize, descData[0], elfNoteSection.descSize); + zebin.appendSection(NEO::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef::fromAny(noteIntelGTSectionData.get(), sectionDataSize)); + } + + NEO::ProgramInfo programInfo; + NEO::SingleDeviceBinary singleBinary; + singleBinary.deviceBinary = zebin.storage; + singleBinary.generatorFeatureVersions.indirectAccessBuffer = 2; + std::string errors; + std::string warnings; + auto error = NEO::decodeSingleDeviceBinary(programInfo, singleBinary, errors, warnings, gfxCoreHelper); + EXPECT_EQ(NEO::DecodeError::success, error); + EXPECT_TRUE(warnings.empty()) << warnings; + EXPECT_TRUE(errors.empty()) << errors; + + ASSERT_EQ(2u, programInfo.kernelInfos.size()); + EXPECT_TRUE(programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.flags.usesPrintf); + EXPECT_FALSE(programInfo.kernelInfos[1]->kernelDescriptor.kernelAttributes.flags.usesPrintf); +} + TEST_F(IntelGTNotesFixture, WhenGettingIntelGTNotesGivenValidIntelGTNotesSectionThenReturnsIntelGTNotes) { std::vector elfNoteSections; size_t numNotes = 5; @@ -6327,7 +6352,7 @@ TEST_F(IntelGTNotesFixture, GivenValidTargetDeviceAndNoteWithUnrecognizedTypeWhe EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - auto validationRes = validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + auto validationRes = validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_TRUE(validationRes); EXPECT_TRUE(outErrReason.empty()); @@ -6385,7 +6410,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenValidTargetDeviceAndV EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST_F(IntelGTNotesFixture, givenAotConfigInIntelGTNotesSectionWhenValidatingTargetDeviceThenUseOnlyItForValidation) { @@ -6417,7 +6442,7 @@ TEST_F(IntelGTNotesFixture, givenAotConfigInIntelGTNotesSectionWhenValidatingTar EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST_F(IntelGTNotesFixture, givenRequestedTargetDeviceWithApplyValidationWorkaroundFlagSetToTrueWhenValidatingDeviceBinaryThenDoNotUseProductConfigForValidation) { @@ -6465,7 +6490,7 @@ TEST_F(IntelGTNotesFixture, givenRequestedTargetDeviceWithApplyValidationWorkaro EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST(ValidateTargetDevice32BitZebin, Given32BitZebinAndValidIntelGTNotesWhenValidatingTargetDeviceThenReturnTrue) { @@ -6492,7 +6517,7 @@ TEST(ValidateTargetDevice32BitZebin, Given32BitZebinAndValidIntelGTNotesWhenVali EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST(ValidateTargetDeviceGeneratorZebin, GivenZebinAndValidIntelGTNotesWithGeneratorIdWhenValidatingTargetDeviceThenGeneratorIsSetCorrectly) { @@ -6522,7 +6547,7 @@ TEST(ValidateTargetDeviceGeneratorZebin, GivenZebinAndValidIntelGTNotesWithGener EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_TRUE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); bool isIgcGeneratedExpectation = static_cast(generatorId); bool isIgcGenerated = static_cast(singleDeviceBinary.generator); @@ -6543,7 +6568,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenValidTargetDeviceAndN EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenInvalidTargetDeviceAndValidNotesThenReturnFalse) { @@ -6596,7 +6621,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenInvalidTargetDeviceAn EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenValidTargetDeviceAndInvalidNoteTypeThenReturnFalse) { @@ -6622,7 +6647,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenValidTargetDeviceAndI EXPECT_TRUE(outErrReason.empty()); SingleDeviceBinary singleDeviceBinary{}; - EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary)); + EXPECT_FALSE(validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator)); } TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenInvalidIntelGTNotesSecionSizeWhichWilLCauseOOBAccessThenReturnFalse) { @@ -6648,7 +6673,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenInvalidIntelGTNotesSe SingleDeviceBinary singleDeviceBinary{}; TargetDevice targetDevice; - auto result = validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + auto result = validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_FALSE(result); EXPECT_TRUE(outWarning.empty()); auto errStr{"DeviceBinaryFormat::zebin : Offsetting will cause out-of-bound memory read! Section size: " + std::to_string(incorrectSectionDataSize) + @@ -6677,7 +6702,7 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenValidZeInfoVersionInI SingleDeviceBinary singleDeviceBinary{}; TargetDevice targetDevice; - validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_TRUE(outErrReason.empty()); } @@ -6702,17 +6727,17 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenIndirectDetectionVers SingleDeviceBinary singleDeviceBinary{}; TargetDevice targetDevice; - validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_TRUE(outErrReason.empty()); EXPECT_EQ(indirectDetectionVersion, singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection); NEO::MockExecutionEnvironment mockExecutionEnvironment{}; auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); NEO::ProgramInfo programInfo; - ZebinTestData::ValidEmptyProgram zebin; singleDeviceBinary.deviceBinary = {zebin.storage.data(), zebin.storage.size()}; std::string errors; std::string warnings; + zebin.elfHeader->machine = productFamily; auto error = NEO::decodeSingleDeviceBinary(programInfo, singleDeviceBinary, errors, warnings, gfxCoreHelper); EXPECT_EQ(NEO::DecodeError::success, error); EXPECT_TRUE(warnings.empty()) << warnings; @@ -6721,31 +6746,70 @@ TEST_F(IntelGTNotesFixture, WhenValidatingTargetDeviceGivenIndirectDetectionVers } TEST_F(IntelGTNotesFixture, GivenIndirectAccessBufferVersionInIntelGTNotesWhenValidatingTargetDeviceThenVersionIsPopulatedCorrectly) { + + MockExecutionEnvironment executionEnvironment; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); + compilerProductHelper.adjustHwInfoForIgc(hwInfo); + TargetDevice targetDevice = getTargetDevice(rootDeviceEnvironment); + if (zebin.elfHeader->identity.eClass == NEO::Elf::EI_CLASS_64) { + targetDevice.maxPointerSizeInBytes = 8; + } const uint32_t indirectAccessBufferMajorVersion = 4u; - Zebin::Elf::ElfNoteSection elfNoteSection = {}; - elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion; - elfNoteSection.descSize = sizeof(uint32_t); - elfNoteSection.nameSize = 8u; + { + std::vector elfNoteSections; + for (int i = 0; i < 2; i++) { + auto &inserted = elfNoteSections.emplace_back(); + inserted.descSize = 4u; + inserted.nameSize = 8u; + } - auto sectionDataSize = sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + alignUp(elfNoteSection.descSize, 4); - auto noteIntelGTSectionData = std::make_unique(sectionDataSize); - appendSingleIntelGTSectionData(elfNoteSection, noteIntelGTSectionData.get(), reinterpret_cast(&indirectAccessBufferMajorVersion), - Zebin::Elf::intelGTNoteOwnerName.str().c_str(), sectionDataSize); - zebin.appendSection(Zebin::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef::fromAny(noteIntelGTSectionData.get(), sectionDataSize)); + elfNoteSections.at(0).type = Zebin::Elf::IntelGTSectionType::productFamily; + elfNoteSections.at(1).type = Zebin::Elf::IntelGTSectionType::productConfig; + + elfNoteSections.emplace_back(); + elfNoteSections.at(2).type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion; + elfNoteSections.at(2).descSize = sizeof(uint32_t); + elfNoteSections.at(2).nameSize = 8u; + + std::vector descData; + + uint8_t productFamilyData[4]; + memcpy_s(productFamilyData, 4, &targetDevice.productFamily, 4); + descData.push_back(productFamilyData); + + uint8_t productConfigData[4]; + memcpy_s(productConfigData, 4, &targetDevice.aotConfig.value, 4); + descData.push_back(productConfigData); + + uint8_t indirectAccessBufferData[4]; + memcpy_s(indirectAccessBufferData, 4, &indirectAccessBufferMajorVersion, 4); + descData.push_back(indirectAccessBufferData); + + const auto sectionDataSize = std::accumulate(elfNoteSections.begin(), elfNoteSections.end(), size_t{0u}, + [](auto totalSize, const auto &elfNoteSection) { + return totalSize + sizeof(NEO::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize; + }); + auto noteIntelGTSectionData = std::make_unique(sectionDataSize); + appendIntelGTSectionData(elfNoteSections, noteIntelGTSectionData.get(), descData, sectionDataSize); + zebin.appendSection(NEO::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef::fromAny(noteIntelGTSectionData.get(), sectionDataSize)); + } std::string outErrReason, outWarning; auto elf = Zebin::Elf::decodeElf(zebin.storage, outErrReason, outWarning); SingleDeviceBinary singleDeviceBinary{}; - TargetDevice targetDevice; - validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + singleDeviceBinary.targetDevice = targetDevice; + + validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_EQ(indirectAccessBufferMajorVersion, singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer); NEO::MockExecutionEnvironment mockExecutionEnvironment{}; auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); NEO::ProgramInfo programInfo; - ZebinTestData::ValidEmptyProgram zebin; singleDeviceBinary.deviceBinary = {zebin.storage.data(), zebin.storage.size()}; + std::string errors; std::string warnings; auto error = NEO::decodeSingleDeviceBinary(programInfo, singleDeviceBinary, errors, warnings, gfxCoreHelper); @@ -6797,7 +6861,7 @@ TEST_F(IntelGTNotesFixture, GivenInvalidVersioningWhenValidatingTargetDeviceThen SingleDeviceBinary singleDeviceBinary{}; TargetDevice targetDevice; - validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_TRUE(outWarning.empty()); EXPECT_STREQ("DeviceBinaryFormat::zebin::.ze_info : Invalid version format - expected 'MAJOR.MINOR' string, got : .11\n", outErrReason.c_str()); } @@ -6821,7 +6885,7 @@ TEST_F(IntelGTNotesFixture, GivenIncompatibleVersioningWhenValidatingTargetDevic SingleDeviceBinary singleDeviceBinary{}; TargetDevice targetDevice; - validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary); + validateTargetDevice(elf, targetDevice, outErrReason, outWarning, singleDeviceBinary.generatorFeatureVersions, singleDeviceBinary.generator); EXPECT_TRUE(outWarning.empty()); EXPECT_STREQ("DeviceBinaryFormat::zebin::.ze_info : Unhandled major version : 2, decoder is at : 1\n", outErrReason.c_str()); } diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp index 5c2aa399d1..ca30bd876a 100644 --- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp +++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp @@ -111,6 +111,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP void *outImplicitArgs = nullptr; KernelDescriptor kernelDescriptor{}; + kernelDescriptor.kernelAttributes.simdSize = 32; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 0; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 2; @@ -129,34 +130,35 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t); auto localIdsOffset = alignUp(localIdsPatchingSize, MemoryConstants::cacheLineSize); - auto memoryToPatch = std::make_unique(totalSizeForPatching); + auto memoryToPatch = std::make_unique(totalSizeForPatching + 64); + auto alignedMemory = alignUp(memoryToPatch.get(), 64); uint8_t pattern = 0xcd; - memset(memoryToPatch.get(), pattern, totalSizeForPatching); + memset(alignedMemory, pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(alignedMemory, implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); - EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); + EXPECT_EQ(retVal, ptrOffset(alignedMemory, totalSizeForPatching)); - void *expectedImplicitArgsPtr = ptrOffset(memoryToPatch.get(), localIdsOffset); + void *expectedImplicitArgsPtr = ptrOffset(alignedMemory, localIdsOffset); EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs); uint32_t offset = 0; for (; offset < localIdsPatchingSize; offset++) { - EXPECT_NE(pattern, memoryToPatch.get()[offset]) << offset; + EXPECT_NE(pattern, alignedMemory[offset]) << offset; } - + offset = static_cast(localIdsOffset); for (; offset < totalSizeForPatching - ImplicitArgsV0::getAlignedSize(); offset++) { - EXPECT_EQ(pattern, memoryToPatch.get()[offset]); + EXPECT_EQ(pattern, alignedMemory[offset]); } for (; offset < totalSizeForPatching - (ImplicitArgsV0::getAlignedSize() - ImplicitArgsV0::getSize()); offset++) { - EXPECT_NE(pattern, memoryToPatch.get()[offset]); + EXPECT_NE(pattern, alignedMemory[offset]); } for (; offset < totalSizeForPatching; offset++) { - EXPECT_EQ(pattern, memoryToPatch.get()[offset]); + EXPECT_EQ(pattern, alignedMemory[offset]); } } @@ -180,27 +182,27 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl EXPECT_EQ(ImplicitArgsV0::getAlignedSize(), totalSizeForPatching); - auto memoryToPatch = std::make_unique(totalSizeForPatching); - + auto memoryToPatch = std::make_unique(totalSizeForPatching + 64); + auto alignedMemory = alignUp(memoryToPatch.get(), 64); uint8_t pattern = 0xcd; - memset(memoryToPatch.get(), pattern, totalSizeForPatching); + memset(alignedMemory, pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(alignedMemory, implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); - EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); + EXPECT_EQ(retVal, ptrOffset(alignedMemory, totalSizeForPatching)); - void *expectedImplicitArgsPtr = memoryToPatch.get(); + void *expectedImplicitArgsPtr = alignedMemory; EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs); uint32_t offset = 0; for (; offset < ImplicitArgsV0::getSize(); offset++) { - EXPECT_NE(pattern, memoryToPatch.get()[offset]); + EXPECT_NE(pattern, alignedMemory[offset]); } for (; offset < totalSizeForPatching; offset++) { - EXPECT_EQ(pattern, memoryToPatch.get()[offset]); + EXPECT_EQ(pattern, alignedMemory[offset]); } }