diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index a542b9304b..ff2c7a4eed 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -101,6 +101,12 @@ ze_result_t CommandListCoreFamily::reset() { clearCommandsToPatch(); commandListSLMEnabled = false; + if (device->isImplicitScalingCapable() && !this->internalUsage) { + this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); + } else { + this->partitionCount = 1; + } + if (!isCopyOnly()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { programStateBaseAddress(commandContainer, false); @@ -113,11 +119,6 @@ ze_result_t CommandListCoreFamily::reset() { device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc); } this->ownedPrivateAllocations.clear(); - if (device->isImplicitScalingCapable() && !this->internalUsage) { - this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); - } else { - this->partitionCount = 1; - } return ZE_RESULT_SUCCESS; } @@ -130,6 +131,10 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->engineGroupType = engineGroupType; this->flags = flags; + if (device->isImplicitScalingCapable() && !this->internalUsage) { + this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); + } + if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { this->isFlushTaskSubmissionEnabled = NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled); @@ -151,10 +156,6 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO } } - if (device->isImplicitScalingCapable() && !this->internalUsage) { - this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); - } - return returnType; } @@ -2321,7 +2322,7 @@ void CommandListCoreFamily::programStateBaseAddress(NEO::CommandC NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); STATE_BASE_ADDRESS sba; - NEO::EncodeStateBaseAddress::encode(commandContainer, sba); + NEO::EncodeStateBaseAddress::encode(commandContainer, sba, this->partitionCount > 1); if (NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger()) { NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.BindlessSurfaceStateBaseAddress = sba.getBindlessSurfaceStateBaseAddress(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 1b1f89f986..7a4440aa22 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,6 +11,7 @@ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -66,6 +67,40 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd EXPECT_EQ(ssh->getHeapGpuBase(), cmdSba->getSurfaceStateBaseAddress()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); + + EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); + EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); +} + +using MultiTileCommandListTests = Test>; +HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); + EXPECT_EQ(2u, commandList->partitionCount); + auto &commandContainer = commandList->commandContainer; + + ASSERT_NE(nullptr, commandContainer.getCommandStream()); + auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); + + auto result = commandList->close(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + auto itorSba = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorSba); + + auto cmdSba = genCmdCast(*itorSba); + + EXPECT_FALSE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); + EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } using CommandListTestsReserveSize = Test; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 7861d78dde..695df313bb 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -226,8 +226,8 @@ struct EncodeMediaInterfaceDescriptorLoad { template struct EncodeStateBaseAddress { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; - static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd); - static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics); + static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable); + static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable); static void setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd); static size_t getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container); }; diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 3755381f32..56f4917e0b 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -177,7 +177,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sba, statelessMocsIndex, false); + EncodeStateBaseAddress::encode(container, sba, statelessMocsIndex, false, false); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } @@ -378,14 +378,14 @@ void EncodeStateBaseAddress::setIohAddressForDebugger(NEO::Debugger::Sba } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) { +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics) { +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { if (container.isAnyHeapDirty()) { EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true); } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 3b76124ffb..a6f5ca22f6 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -213,7 +213,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics, args.partitionCount > 1); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } @@ -473,17 +473,15 @@ void EncodeStateBaseAddress::setIohAddressForDebugger(NEO::Debugger::Sba } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) { +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics) { +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); - bool multiOsContextCapable = - ImplicitScalingHelper::isImplicitScalingEnabled(container.getDevice()->getDeviceBitfield(), true); StateBaseAddressHelper::programStateBaseAddress( &sbaCmd, diff --git a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp index a6c1c6e71b..44ae10826c 100644 --- a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp @@ -80,7 +80,7 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi { STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(cmdContainer, sba); + EncodeStateBaseAddress::encode(cmdContainer, sba, false); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()); @@ -94,7 +94,7 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi static_cast(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS; STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(cmdContainer, sba); + EncodeStateBaseAddress::encode(cmdContainer, sba, false); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index e820240f60..c2b54b1d80 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -288,7 +288,7 @@ HWTEST2_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetSta cmdContainer->setHeapDirty(NEO::HeapType::SURFACE_STATE); STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(*cmdContainer.get(), sba); + EncodeStateBaseAddress::encode(*cmdContainer.get(), sba, false); auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE); @@ -318,7 +318,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenSetStateBaseAddressCa cmdContainer->dirtyHeaps = 0; STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(*cmdContainer.get(), sba); + EncodeStateBaseAddress::encode(*cmdContainer.get(), sba, false); auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE);