diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b618cf44d5..42d82d18e9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2158,17 +2158,16 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo); if (!containsAnyKernel) { - requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, device->getHwInfo()); + requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, device->getHwInfo()); finalStreamState = requiredStreamState; containsAnyKernel = true; } - finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, hwInfo); + finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, hwInfo); if (finalStreamState.frontEndState.isDirty()) { auto pVfeStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); auto pVfeState = new VFE_STATE_TYPE; - NEO::PreambleHelper::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), - NEO::AdditionalKernelExecInfo::NotApplicable, finalStreamState); + NEO::PreambleHelper::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState); commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState}); } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 8255cc614a..c24b13e7cd 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -336,6 +336,7 @@ ze_result_t CommandQueueHw::executeCommandLists( if (!isCopyOnlyCommandQueue) { auto &requiredStreamState = commandList->getRequiredStreamState(); streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState); + streamProperties.frontEndState.singleSliceDispatchCcsMode.value = csr->getOsContext().isEngineInstanced(); auto programVfe = streamProperties.frontEndState.isDirty(); if (frontEndStateDirty) { programVfe = true; @@ -346,6 +347,7 @@ ze_result_t CommandQueueHw::executeCommandLists( } auto &finalStreamState = commandList->getFinalStreamState(); streamProperties.frontEndState.setProperties(finalStreamState.frontEndState); + streamProperties.frontEndState.singleSliceDispatchCcsMode.value = csr->getOsContext().isEngineInstanced(); } patchCommands(*commandList, scratchSpaceController->getScratchPatchAddress()); @@ -437,7 +439,6 @@ void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uin perThreadScratchSpaceSize, scratchAddress, device->getMaxNumHwThreads(), - NEO::AdditionalKernelExecInfo::NotApplicable, streamProperties); csr->setMediaVFEStateDirty(false); } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index 826f90d4b6..002c7aee96 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -135,12 +135,18 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress); + CFE_STATE *cfeStateCmd = nullptr; + auto &commandsToPatch = commandList.getCommandsToPatch(); for (auto &commandToPatch : commandsToPatch) { switch (commandToPatch.type) { case CommandList::CommandToPatch::FrontEndState: - reinterpret_cast(commandToPatch.pCommand)->setScratchSpaceBuffer(lowScratchAddress); - *reinterpret_cast(commandToPatch.pDestination) = *reinterpret_cast(commandToPatch.pCommand); + cfeStateCmd = reinterpret_cast(commandToPatch.pCommand); + + cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress); + cfeStateCmd->setSingleSliceDispatchCcsMode(streamProperties.frontEndState.singleSliceDispatchCcsMode.value); + + *reinterpret_cast(commandToPatch.pDestination) = *cfeStateCmd; break; default: UNRECOVERABLE_IF(true); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index e40aa69bf0..82b599c020 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -17,6 +17,7 @@ #include "shared/test/common/mocks/mock_bindless_heaps_helper.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" @@ -1134,6 +1135,100 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen commandQueue->destroy(); } +struct EngineInstancedDeviceExecuteTests : public ::testing::Test { + void SetUp() override { + DebugManager.flags.EngineInstancedSubDevices.set(true); + } + + bool createDevices(uint32_t numGenericSubDevices, uint32_t numCcs) { + DebugManager.flags.CreateMultipleSubDevices.set(numGenericSubDevices); + + auto executionEnvironment = std::make_unique(); + executionEnvironment->prepareRootDeviceEnvironments(1); + + executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); + auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); + hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = numCcs; + hwInfo->featureTable.ftrCCSNode = (numCcs > 0); + HwHelper::get(hwInfo->platform.eRenderCoreFamily).adjustDefaultEngineType(hwInfo); + + if (!multiCcsDevice(*hwInfo, numCcs)) { + return false; + } + executionEnvironment->parseAffinityMask(); + deviceFactory = std::make_unique(1, numGenericSubDevices, *executionEnvironment.release()); + rootDevice = deviceFactory->rootDevices[0]; + EXPECT_NE(nullptr, rootDevice); + + return true; + } + + bool multiCcsDevice(const HardwareInfo &hwInfo, uint32_t expectedNumCcs) { + auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); + + uint32_t numCcs = 0; + + for (auto &engine : gpgpuEngines) { + if (EngineHelpers::isCcs(engine.first) && (engine.second == EngineUsage::Regular)) { + numCcs++; + } + } + + return (numCcs == expectedNumCcs); + } + + DebugManagerStateRestore restorer; + std::unique_ptr deviceFactory; + MockDevice *rootDevice = nullptr; +}; + +HWTEST2_F(EngineInstancedDeviceExecuteTests, givenEngineInstancedDeviceWhenExecutingThenEnableSingleSliceDispatch, IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + constexpr uint32_t genericDevicesCount = 1; + constexpr uint32_t ccsCount = 2; + + if (!createDevices(genericDevicesCount, ccsCount)) { + GTEST_SKIP(); + } + + auto subDevice = static_cast(rootDevice->getDeviceById(0)); + auto defaultEngine = subDevice->getDefaultEngine(); + EXPECT_TRUE(defaultEngine.osContext->isEngineInstanced()); + + std::vector> devices; + devices.push_back(std::unique_ptr(subDevice)); + + auto driverHandle = std::make_unique>(); + driverHandle->initialize(std::move(devices)); + + auto l0Device = driverHandle->devices[0]; + + ze_command_queue_desc_t desc = {}; + NEO::CommandStreamReceiver *csr; + l0Device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, l0Device, csr, &desc, false, false, returnValue)); + auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, l0Device, NEO::EngineGroupType::Compute, 0u, returnValue))); + auto commandListHandle = commandList->toHandle(); + + commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + + GenCmdList cmdList; + FamilyType::PARSE::parseCommandBuffer(cmdList, commandQueue->commandStream->getCpuBase(), commandQueue->commandStream->getUsed()); + + auto cfeStates = findAll(cmdList.begin(), cmdList.end()); + + EXPECT_NE(0u, cfeStates.size()); + + for (auto &cmd : cfeStates) { + auto cfeState = reinterpret_cast(*cmd); + EXPECT_TRUE(cfeState->getSingleSliceDispatchCcsMode()); + } + + commandQueue->destroy(); +} + HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsWithPTSSsetForFirstCmdListThenMVSAndGSBAAreProgrammedOnlyOnce, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; diff --git a/opencl/source/device_queue/device_queue_hw_bdw_and_later.inl b/opencl/source/device_queue/device_queue_hw_bdw_and_later.inl index 39ad88c5ae..5e5780ab36 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_and_later.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_and_later.inl @@ -135,8 +135,7 @@ void DeviceQueueHw::addMediaStateClearCmds() { auto pVfeState = PreambleHelper::getSpaceForVfeState(&slbCS, device->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeState, device->getHardwareInfo(), 0u, 0, device->getSharedDeviceInfo().maxFrontEndThreads, - AdditionalKernelExecInfo::NotApplicable, emptyProperties); + PreambleHelper::programVfeState(pVfeState, device->getHardwareInfo(), 0u, 0, device->getSharedDeviceInfo().maxFrontEndThreads, emptyProperties); } template diff --git a/opencl/test/unit_test/device/sub_device_tests.cpp b/opencl/test/unit_test/device/sub_device_tests.cpp index cba3c4705d..a3531756de 100644 --- a/opencl/test/unit_test/device/sub_device_tests.cpp +++ b/opencl/test/unit_test/device/sub_device_tests.cpp @@ -9,14 +9,18 @@ #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/source/cl_device/cl_device.h" +#include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" +#include "test.h" using namespace NEO; @@ -695,6 +699,33 @@ TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSingle2rdLevelDeviceOnlyW EXPECT_TRUE(hasEngineInstancedEngines(rootDevice, engineType)); } +HWTEST2_F(EngineInstancedDeviceTests, givenEngineInstancedDeviceWhenProgrammingCfeStateThenSetSingleSliceDispatch, IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + constexpr uint32_t genericDevicesCount = 1; + constexpr uint32_t ccsCount = 2; + + if (!createDevices(genericDevicesCount, ccsCount)) { + GTEST_SKIP(); + } + + auto subDevice = static_cast(rootDevice->getDeviceById(0)); + auto defaultEngine = subDevice->getDefaultEngine(); + EXPECT_TRUE(defaultEngine.osContext->isEngineInstanced()); + + char buffer[64] = {}; + MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); + LinearStream linearStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); + + auto csr = static_cast *>(defaultEngine.commandStreamReceiver); + auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + csr->programVFEState(linearStream, dispatchFlags, 1); + + auto cfeState = reinterpret_cast(buffer); + EXPECT_TRUE(cfeState->getSingleSliceDispatchCcsMode()); +} + TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized) { auto device = std::make_unique(); device->initializeRootCommandStreamReceiver(); diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index d4bd3b76e7..9822fb84aa 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -143,7 +143,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPPlus, givenScratchEnabledWhenPr uint32_t expectedMaxThreads = HwHelper::getMaxThreadsForVfe(*defaultHwInfo); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, AdditionalKernelExecInfo::NotApplicable, emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, emptyProperties); parseCommands(linearStream); @@ -171,7 +171,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPPlus, givenNotSetDebugFlagWhenP uint32_t expectedMaxThreads = HwHelper::getMaxThreadsForVfe(*defaultHwInfo); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, AdditionalKernelExecInfo::NotApplicable, emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, emptyProperties); uint32_t maximumNumberOfThreads = cfeState->getMaximumNumberOfThreads(); EXPECT_EQ(numberOfWalkers, cfeState->getNumberOfWalkers()); @@ -198,7 +198,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPPlus, givenSetDebugFlagWhenPrea uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, 16u, AdditionalKernelExecInfo::NotApplicable, emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, 16u, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index 41e3745cc9..beb764296b 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -48,6 +48,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::programPerDssBackedBuffer; using BaseClass::programPreamble; using BaseClass::programStateSip; + using BaseClass::programVFEState; using BaseClass::requiresInstructionCacheFlush; using BaseClass::rootDeviceIndex; using BaseClass::sshState; diff --git a/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp b/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp index 463b07b0e8..0dee858b6b 100644 --- a/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp +++ b/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp @@ -23,8 +23,8 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetFal auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; - streamProperties.frontEndState.setProperties(false, false, *defaultHwInfo); - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties); + streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); @@ -39,8 +39,8 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetTru auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; - streamProperties.frontEndState.setProperties(false, false, *defaultHwInfo); - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties); + streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); @@ -56,8 +56,8 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetFals auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; - streamProperties.frontEndState.setProperties(false, false, *defaultHwInfo); - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties); + streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); @@ -72,8 +72,8 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrue auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; - streamProperties.frontEndState.setProperties(false, false, *defaultHwInfo); - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties); + streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 2c7b97f2e3..0ce4bed8b6 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -937,10 +937,10 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent, - disableOverdispatch, hwInfo); + disableOverdispatch, osContext->isEngineInstanced(), hwInfo); PreambleHelper::programVfeState( pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(), - maxFrontEndThreads, lastAdditionalKernelExecInfo, streamProperties); + maxFrontEndThreads, streamProperties); auto commandOffset = PreambleHelper::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl index cd53e569fc..d0fa3d2254 100644 --- a/shared/source/command_stream/definitions/stream_properties.inl +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -21,8 +21,9 @@ struct StateComputeModeProperties { struct FrontEndProperties { StreamProperty disableOverdispatch{}; + StreamProperty singleSliceDispatchCcsMode{}; - void setProperties(bool isCooperativeKernel, bool disableOverdispatch, const HardwareInfo &hwInfo); + void setProperties(bool isCooperativeKernel, bool disableOverdispatch, bool engineInstancedDevice, const HardwareInfo &hwInfo); void setProperties(const FrontEndProperties &properties); bool isDirty(); void clearIsDirty(); diff --git a/shared/source/command_stream/stream_properties.cpp b/shared/source/command_stream/stream_properties.cpp index a468aa727d..f79ca1beb1 100644 --- a/shared/source/command_stream/stream_properties.cpp +++ b/shared/source/command_stream/stream_properties.cpp @@ -37,22 +37,25 @@ void StateComputeModeProperties::clearIsDirty() { largeGrfMode.isDirty = false; } -void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableOverdispatch, const HardwareInfo &hwInfo) { +void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableOverdispatch, bool engineInstancedDevice, const HardwareInfo &hwInfo) { clearIsDirty(); - this->disableOverdispatch.set(disableOverdispatch ? 1 : 0); + this->disableOverdispatch.set(disableOverdispatch); + this->singleSliceDispatchCcsMode.set(engineInstancedDevice); } void FrontEndProperties::setProperties(const FrontEndProperties &properties) { clearIsDirty(); disableOverdispatch.set(properties.disableOverdispatch.value); + singleSliceDispatchCcsMode.set(properties.singleSliceDispatchCcsMode.value); } bool FrontEndProperties::isDirty() { - return disableOverdispatch.isDirty; + return disableOverdispatch.isDirty || singleSliceDispatchCcsMode.isDirty; } void FrontEndProperties::clearIsDirty() { disableOverdispatch.isDirty = false; + singleSliceDispatchCcsMode.isDirty = false; } diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index d75d12c3ab..ff364f8655 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -40,7 +40,7 @@ struct PreambleHelper { static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr); static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, EngineGroupType engineGroupType); - static void appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd); + static void appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd); static void *getSpaceForVfeState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, EngineGroupType engineGroupType); @@ -49,7 +49,6 @@ struct PreambleHelper { uint32_t scratchSize, uint64_t scratchAddress, uint32_t maxFrontEndThreads, - uint32_t additionalExecInfo, const StreamProperties &streamProperties); static uint64_t getScratchSpaceAddressOffsetForVfeState(LinearStream *pCommandStream, void *pVfeState); static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo); diff --git a/shared/source/helpers/preamble_base.inl b/shared/source/helpers/preamble_base.inl index cb856cf962..42ce0429b0 100644 --- a/shared/source/helpers/preamble_base.inl +++ b/shared/source/helpers/preamble_base.inl @@ -117,7 +117,7 @@ void PreambleHelper::programAdditionalFieldsInVfeState(VFE_STATE_TYPE } template -void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd) {} +void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd) {} template uint32_t PreambleHelper::getScratchSizeValueToProgramMediaVfeState(uint32_t scratchSize) { diff --git a/shared/source/helpers/preamble_bdw_and_later.inl b/shared/source/helpers/preamble_bdw_and_later.inl index c1c7cda28c..de144aba2f 100644 --- a/shared/source/helpers/preamble_bdw_and_later.inl +++ b/shared/source/helpers/preamble_bdw_and_later.inl @@ -42,7 +42,6 @@ void PreambleHelper::programVfeState(void *pVfeState, uint32_t scratchSize, uint64_t scratchAddress, uint32_t maxFrontEndThreads, - uint32_t additionalExecInfo, const StreamProperties &streamProperties) { using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; @@ -59,7 +58,7 @@ void PreambleHelper::programVfeState(void *pVfeState, cmd.setScratchSpaceBasePointerHigh(highAddress); programAdditionalFieldsInVfeState(&cmd, hwInfo); - appendProgramVFEState(hwInfo, streamProperties, additionalExecInfo, &cmd); + appendProgramVFEState(hwInfo, streamProperties, &cmd); *pMediaVfeState = cmd; } diff --git a/shared/source/helpers/preamble_xehp_and_later.inl b/shared/source/helpers/preamble_xehp_and_later.inl index 39590e1a9a..57893b3b0f 100644 --- a/shared/source/helpers/preamble_xehp_and_later.inl +++ b/shared/source/helpers/preamble_xehp_and_later.inl @@ -80,7 +80,7 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { return 0u; } template <> -void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd); +void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd); template <> void *PreambleHelper::getSpaceForVfeState(LinearStream *pCommandStream, @@ -96,7 +96,6 @@ void PreambleHelper::programVfeState(void *pVfeState, uint32_t scratchSize, uint64_t scratchAddress, uint32_t maxFrontEndThreads, - uint32_t additionalKernelExecInfo, const StreamProperties &streamProperties) { using CFE_STATE = typename Family::CFE_STATE; @@ -108,7 +107,7 @@ void PreambleHelper::programVfeState(void *pVfeState, uint32_t lowAddress = uint32_t(0xFFFFFFFF & scratchAddress); cmd.setScratchSpaceBuffer(lowAddress); cmd.setMaximumNumberOfThreads(maxFrontEndThreads); - appendProgramVFEState(hwInfo, streamProperties, additionalKernelExecInfo, &cmd); + appendProgramVFEState(hwInfo, streamProperties, &cmd); if (DebugManager.flags.CFENumberOfWalkers.get() != -1) { cmd.setNumberOfWalkers(DebugManager.flags.CFENumberOfWalkers.get()); diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp index 545e3e8cab..b4d4be6ffa 100644 --- a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -17,10 +17,11 @@ using Family = XeHpFamily; namespace NEO { template <> -void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd) { +void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd) { auto command = static_cast(cmd); command->setComputeOverdispatchDisable(streamProperties.frontEndState.disableOverdispatch.value == 1); + command->setSingleSliceDispatchCcsMode(streamProperties.frontEndState.singleSliceDispatchCcsMode.value); if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) { command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get()); diff --git a/shared/test/common/gen11/test_preamble_gen11.cpp b/shared/test/common/gen11/test_preamble_gen11.cpp index 9b0712f6ca..a07f620cb3 100644 --- a/shared/test/common/gen11/test_preamble_gen11.cpp +++ b/shared/test/common/gen11/test_preamble_gen11.cpp @@ -61,9 +61,7 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, emptyProperties); parseCommands(cs); @@ -83,9 +81,7 @@ GEN11TEST_F(Gen11PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammi LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, emptyProperties); parseCommands(cs); diff --git a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp index 4ddd6f8204..ff956549f0 100644 --- a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp @@ -65,9 +65,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, emptyProperties); parseCommands(cs); @@ -89,9 +87,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenCcsEngineWhenWaIsSetThenAppropriatePipeC auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::Compute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, emptyProperties); parseCommands(cs); @@ -112,9 +108,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenRcsEngineWhenWaIsSetThenAppropriatePipeC auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 672u, emptyProperties); parseCommands(cs); diff --git a/shared/test/common/gen8/test_preamble_gen8.cpp b/shared/test/common/gen8/test_preamble_gen8.cpp index 1720f3dd18..6f5b810259 100644 --- a/shared/test/common/gen8/test_preamble_gen8.cpp +++ b/shared/test/common/gen8/test_preamble_gen8.cpp @@ -98,8 +98,7 @@ BDWTEST_F(PreambleVfeState, WhenProgrammingVfeStateThenProgrammingIsCorrect) { LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 168u, - AdditionalKernelExecInfo::NotApplicable, emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 168u, emptyProperties); parseCommands(cs); diff --git a/shared/test/common/gen9/skl/test_preamble_skl.cpp b/shared/test/common/gen9/skl/test_preamble_skl.cpp index 1d7ab03cba..bdbf01af49 100644 --- a/shared/test/common/gen9/skl/test_preamble_skl.cpp +++ b/shared/test/common/gen9/skl/test_preamble_skl.cpp @@ -110,9 +110,7 @@ GEN9TEST_F(PreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgrammingIsC LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, emptyProperties); parseCommands(cs); @@ -132,9 +130,7 @@ GEN9TEST_F(PreambleVfeState, GivenWaOnWhenProgrammingVfeStateThenProgrammingIsCo LinearStream &cs = linearStream; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pPlatform->getClDevice(0)->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, - AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, pPlatform->getClDevice(0)->getHardwareInfo(), 0u, 0, 168u, emptyProperties); parseCommands(cs); diff --git a/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp b/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp index 51de4d9a57..ae804f4aa2 100644 --- a/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp +++ b/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp @@ -105,9 +105,7 @@ XEHPTEST_F(XeHPPreambleVfeState, WhenProgramVFEStateIsCalledThenCorrectCfeStateA auto pCfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pCfeCmd, *defaultHwInfo, 1024u, addressToPatch, - 10u, AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pCfeCmd, *defaultHwInfo, 1024u, addressToPatch, 10u, emptyProperties); EXPECT_GE(reinterpret_cast(pCfeCmd), reinterpret_cast(preambleStream.getCpuBase())); EXPECT_LT(reinterpret_cast(pCfeCmd), reinterpret_cast(preambleStream.getCpuBase()) + preambleStream.getUsed()); diff --git a/shared/test/unit_test/command_stream/stream_properties_tests.cpp b/shared/test/unit_test/command_stream/stream_properties_tests.cpp index 2036f24f51..88ad937cf2 100644 --- a/shared/test/unit_test/command_stream/stream_properties_tests.cpp +++ b/shared/test/unit_test/command_stream/stream_properties_tests.cpp @@ -23,6 +23,7 @@ std::vector getAllStateComputeModeProperties(StateComputeModeP std::vector getAllFrontEndProperties(FrontEndProperties &properties) { std::vector allProperties; allProperties.push_back(&properties.disableOverdispatch); + allProperties.push_back(&properties.singleSliceDispatchCcsMode); return allProperties; } @@ -32,8 +33,11 @@ using namespace NEO; TEST(StreamPropertiesTests, whenSettingCooperativeKernelPropertiesThenCorrectValueIsSet) { StreamProperties properties; - for (auto disableOverdispatch : ::testing::Bool()) { - properties.frontEndState.setProperties(false, disableOverdispatch, *defaultHwInfo); - EXPECT_EQ(disableOverdispatch, properties.frontEndState.disableOverdispatch.value); + for (auto isEngineInstanced : ::testing::Bool()) { + for (auto disableOverdispatch : ::testing::Bool()) { + properties.frontEndState.setProperties(false, disableOverdispatch, isEngineInstanced, *defaultHwInfo); + EXPECT_EQ(disableOverdispatch, properties.frontEndState.disableOverdispatch.value); + EXPECT_EQ(isEngineInstanced, properties.frontEndState.singleSliceDispatchCcsMode.value); + } } } diff --git a/shared/test/unit_test/preamble/preamble_tests.cpp b/shared/test/unit_test/preamble/preamble_tests.cpp index 956b49ed45..e2c10d646a 100644 --- a/shared/test/unit_test/preamble/preamble_tests.cpp +++ b/shared/test/unit_test/preamble/preamble_tests.cpp @@ -226,9 +226,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenProgramVFEStateIsCalledThenCorrect auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 1024u, addressToPatch, - 10u, AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 1024u, addressToPatch, 10u, emptyProperties); EXPECT_GE(reinterpret_cast(pVfeCmd), reinterpret_cast(preambleStream.getCpuBase())); EXPECT_LT(reinterpret_cast(pVfeCmd), reinterpret_cast(preambleStream.getCpuBase()) + preambleStream.getUsed()); @@ -251,9 +249,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenGetScratchSpaceAddressOffsetForVfe auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, mockDevice->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, mockDevice->getHardwareInfo(), 1024u, addressToPatch, - 10u, AdditionalKernelExecInfo::NotApplicable, - emptyProperties); + PreambleHelper::programVfeState(pVfeCmd, mockDevice->getHardwareInfo(), 1024u, addressToPatch, 10u, emptyProperties); auto offset = PreambleHelper::getScratchSpaceAddressOffsetForVfeState(&preambleStream, pVfeCmd); EXPECT_NE(0u, offset);