diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests.h b/opencl/test/unit_test/command_stream/compute_mode_tests.h index 2f14c04541..b7b86c57f8 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests.h +++ b/opencl/test/unit_test/command_stream/compute_mode_tests.h @@ -20,6 +20,7 @@ struct ComputeModeRequirements : public ::testing::Test { template struct myCsr : public UltCommandStreamReceiver { using CommandStreamReceiver::commandStream; + using CommandStreamReceiver::streamProperties; using CommandStreamReceiverHw::lastSentThreadArbitrationPolicy; using CommandStreamReceiverHw::requiredThreadArbitrationPolicy; myCsr(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) @@ -31,10 +32,15 @@ struct ComputeModeRequirements : public ::testing::Test { } template - void overrideComputeModeRequest(bool reqestChanged, bool requireCoherency, bool hasSharedHandles, bool modifyThreadArbitrationPolicy = false) { - overrideComputeModeRequest(reqestChanged, requireCoherency, hasSharedHandles, false, 128u); + void overrideComputeModeRequest(bool reqestChanged, bool requireCoherency, bool hasSharedHandles, + bool modifyThreadArbitrationPolicy = false, bool numGrfRequiredChanged = false, + uint32_t numGrfRequired = 128u) { + overrideComputeModeRequest(reqestChanged, requireCoherency, hasSharedHandles, numGrfRequiredChanged, numGrfRequired); if (modifyThreadArbitrationPolicy) { - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; + auto csrHw = getCsrHw(); + csrHw->lastSentThreadArbitrationPolicy = csrHw->requiredThreadArbitrationPolicy; + csrHw->streamProperties.stateComputeMode.threadArbitrationPolicy.value = csrHw->requiredThreadArbitrationPolicy; + csrHw->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty = true; } } @@ -50,6 +56,10 @@ struct ComputeModeRequirements : public ::testing::Test { csrHw->getCsrRequestFlags()->numGrfRequiredChanged = numGrfRequiredChanged; flags.requiresCoherency = requireCoherency; flags.numGrfRequired = numGrfRequired; + csrHw->streamProperties.stateComputeMode.isCoherencyRequired.value = requireCoherency; + csrHw->streamProperties.stateComputeMode.isCoherencyRequired.isDirty = coherencyRequestChanged; + csrHw->streamProperties.stateComputeMode.largeGrfMode.value = (numGrfRequired == GrfConfig::LargeGrfNumber); + csrHw->streamProperties.stateComputeMode.largeGrfMode.isDirty = numGrfRequiredChanged; if (hasSharedHandles) { makeResidentSharedAlloc(); } diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp b/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp index 4f63c0a7bf..7de5dbe8c1 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp +++ b/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp @@ -39,7 +39,7 @@ HWTEST2_F(ComputeModeRequirements, GivenProgramPipeControlPriorToNonPipelinedSta expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -86,7 +86,7 @@ HWTEST2_F(ComputeModeRequirements, GivenMultipleCCSEnabledSetupThenCorrectComman expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, false, true); getCsrHw()->programComputeMode(stream, flags, hwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp index d2fbd80af8..9b418f0ea1 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp @@ -90,7 +90,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenCompute expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -101,7 +101,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenCompute auto startOffset = stream.getUsed(); - overrideComputeModeRequest(true, true, false, false); + overrideComputeModeRequest(true, true, false, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -128,7 +128,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyWithSharedHandlesWhenComputeMod auto expectedPcCmd = FamilyType::cmdInitPipeControl; - overrideComputeModeRequest(true, false, true, false); + overrideComputeModeRequest(true, false, true, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -142,7 +142,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyWithSharedHandlesWhenComputeMod auto startOffset = stream.getUsed(); - overrideComputeModeRequest(true, true, true, false); + overrideComputeModeRequest(true, true, true, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -180,7 +180,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithoutSharedHandles bool foundOne = false; typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; - uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; + uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); @@ -241,7 +241,7 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithSharedHandlesWhe bool foundOne = false; typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; - uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; + uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); @@ -328,7 +328,7 @@ HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfModeCh expectedScmCmd.setLargeGrfMode(true); - overrideComputeModeRequest(false, false, false, true, 256u); + overrideComputeModeRequest(true, false, false, true, 256u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -339,7 +339,7 @@ HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfModeCh auto startOffset = stream.getUsed(); - overrideComputeModeRequest(false, false, false, true, 128u); + overrideComputeModeRequest(true, false, false, true, 128u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -376,7 +376,7 @@ HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRFNum expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(false, false, false, true, 127u); + overrideComputeModeRequest(true, false, false, true, 127u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -402,7 +402,7 @@ HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRFNum getCsrHw()->requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; - overrideComputeModeRequest(false, false, false, true, 256u); + overrideComputeModeRequest(true, false, false, true, 256u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl index 6095ece504..6653738d89 100644 --- a/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl @@ -24,6 +24,7 @@ struct Gen12LpCoherencyRequirements : public ::testing::Test { struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; + using CommandStreamReceiver::streamProperties; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0, 1){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &csrSizeRequestFlags; } }; @@ -36,6 +37,8 @@ struct Gen12LpCoherencyRequirements : public ::testing::Test { csr->getCsrRequestFlags()->coherencyRequestChanged = reqestChanged; csr->getCsrRequestFlags()->hasSharedHandles = hasSharedHandles; flags.requiresCoherency = requireCoherency; + csr->streamProperties.stateComputeMode.isCoherencyRequired.value = requireCoherency; + csr->streamProperties.stateComputeMode.isCoherencyRequired.isDirty = reqestChanged; if (hasSharedHandles) { makeResidentSharedAlloc(); } @@ -279,7 +282,7 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithSharedH csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); }; - auto flushTaskAndFindCmds = [&](bool expectCoherent) { + auto flushTaskAndFindCmds = [&](bool expectCoherent, bool valueChanged) { flushTask(expectCoherent); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); @@ -291,8 +294,10 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithSharedH for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { - EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); - EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); + if (valueChanged) { + EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); + } + EXPECT_EQ(valueChanged ? expectedCoherentMask : 0u, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); @@ -302,12 +307,12 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithSharedH EXPECT_TRUE(foundOne); }; - flushTaskAndFindCmds(false); // first time - flushTaskAndFindCmds(false); // not changed - flushTaskAndFindCmds(true); // changed - flushTaskAndFindCmds(true); // not changed - flushTaskAndFindCmds(false); // changed - flushTaskAndFindCmds(false); // not changed + flushTaskAndFindCmds(false, true); // first time + flushTaskAndFindCmds(false, false); // not changed + flushTaskAndFindCmds(true, true); // changed + flushTaskAndFindCmds(true, false); // not changed + flushTaskAndFindCmds(false, true); // changed + flushTaskAndFindCmds(false, false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicsAlloc); } @@ -321,6 +326,7 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenFlushWithoutSharedHandlesWhenPr EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); auto startOffset = csr->commandStream.getUsed(); + csr->streamProperties.stateComputeMode.isCoherencyRequired.set(true); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); diff --git a/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp index 2f84522f39..8801c3b113 100644 --- a/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp @@ -29,7 +29,7 @@ HWTEST2_F(ComputeModeRequirements, GivenProgramPipeControlPriorToNonPipelinedSta expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -72,7 +72,7 @@ HWTEST2_F(ComputeModeRequirements, GivenMultipleCCSEnabledSetupThenCorrectComman expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, false, true); getCsrHw()->programComputeMode(stream, flags, hwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp index 7596fd8617..98c82d6422 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp @@ -42,7 +42,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenNewRequiredThreadArbitrationPolicyWhe getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; getCsrHw()->requiredThreadArbitrationPolicy = newEuThreadSchedulingMode; - overrideComputeModeRequest(false, false, false); + overrideComputeModeRequest(true, false, false, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -172,7 +172,10 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH IndirectHeap stream(graphicAlloc); auto flushTask = [&](bool threadArbitrationPolicyChanged) { - getCsrHw()->lastSentThreadArbitrationPolicy = threadArbitrationPolicyChanged ? ThreadArbitrationPolicy::NotPresent : getCsrHw()->lastSentThreadArbitrationPolicy; + if (threadArbitrationPolicyChanged) { + getCsrHw()->streamProperties.stateComputeMode.threadArbitrationPolicy.value = -1; + getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + } startOffset = getCsrHw()->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); }; @@ -182,13 +185,12 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH hwParser.parseCommands(getCsrHw()->commandStream, startOffset); bool foundOne = false; - uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask; + uint32_t expectedCoherentMask = FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { - - EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); + EXPECT_EQ(expectToBeProgrammed ? expectedCoherentMask : 0u, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); @@ -198,6 +200,9 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH EXPECT_EQ(expectToBeProgrammed, foundOne); }; + getCsrHw()->streamProperties.stateComputeMode.setProperties(flags.requiresCoherency, flags.numGrfRequired, + getCsrHw()->lastSentThreadArbitrationPolicy); + flushTask(true); findCmd(true); // first time @@ -225,7 +230,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenComp expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); - overrideComputeModeRequest(true, false, false, true); + overrideComputeModeRequest(true, false, false, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -234,7 +239,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenComp auto startOffset = stream.getUsed() + sizeof(PIPE_CONTROL); - overrideComputeModeRequest(true, true, false, true); + overrideComputeModeRequest(true, true, false, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -264,7 +269,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenCoherencyWithSharedHandlesWhenCompute auto expectedPcCmd = FamilyType::cmdInitPipeControl; - overrideComputeModeRequest(true, false, true, true); + overrideComputeModeRequest(true, false, true, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -276,7 +281,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenCoherencyWithSharedHandlesWhenCompute auto startOffset = stream.getUsed(); - overrideComputeModeRequest(true, true, true, true); + overrideComputeModeRequest(true, true, true, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -308,7 +313,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfMod expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); - overrideComputeModeRequest(false, false, false, true, 256u); + overrideComputeModeRequest(true, false, false, true, true, 256u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -317,7 +322,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfMod auto startOffset = stream.getUsed() + sizeof(PIPE_CONTROL); - overrideComputeModeRequest(false, false, false, true, 128u); + overrideComputeModeRequest(true, false, false, true, true, 128u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); @@ -347,7 +352,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRF expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); - overrideComputeModeRequest(false, false, false, true, 127u); + overrideComputeModeRequest(true, false, false, true, true, 127u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -371,7 +376,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingThenCorrectComm expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -419,7 +424,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenProgramPipeControlPriorToNonPipelined expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); - overrideComputeModeRequest(true, false, false, false); + overrideComputeModeRequest(true, false, false, true, true); getCsrHw()->programComputeMode(stream, flags, hwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/opencl/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp index f4b59da510..cfcf902535 100644 --- a/opencl/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/compute_mode_tests_xe_hpg_core.cpp @@ -51,6 +51,8 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp DebugManager.flags.ForceZPassAsyncComputeThreadLimit.set(testValue.zPassThreadLimit); DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(testValue.pixelThreadLimit); + pCsr->streamProperties.stateComputeMode = {}; + pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u); LinearStream stream(buff, 1024); pCsr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(sizeof(STATE_COMPUTE_MODE), stream.getUsed()); @@ -67,6 +69,8 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp DebugManager.flags.ForceZPassAsyncComputeThreadLimit.set(-1); DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(-1); + pCsr->streamProperties.stateComputeMode = {}; + pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u); LinearStream stream(buff, 1024); pCsr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(sizeof(STATE_COMPUTE_MODE), stream.getUsed()); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 4ca53b45f0..93872cd8d1 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -258,25 +258,27 @@ CompletionStamp CommandStreamReceiverHw::flushTask( dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, peekHwInfo()); + if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) { + this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy; + } + if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) { + dispatchFlags.numGrfRequired = lastSentNumGrfRequired; + } + + this->streamProperties.stateComputeMode.setProperties(dispatchFlags.requiresCoherency, dispatchFlags.numGrfRequired, + this->requiredThreadArbitrationPolicy); + csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast(dispatchFlags.requiresCoherency); csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); csrSizeRequestFlags.specialPipelineSelectModeChanged = isSpecialPipelineSelectModeChanged; - if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) { - dispatchFlags.numGrfRequired = lastSentNumGrfRequired; - } - csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired; lastSentNumGrfRequired = dispatchFlags.numGrfRequired; csrSizeRequestFlags.activePartitionsChanged = isProgramActivePartitionConfigRequired(); - if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) { - this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy; - } - auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations(); bool stateBaseAddressDirty = false; diff --git a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl index 09dfb866d5..ec50008a0c 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl @@ -24,19 +24,16 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs())) { PipeControlArgs args(true); - addPipeControlPriorToNonPipelinedStateCommand(stream, args); } - StreamProperties properties{}; - properties.stateComputeMode.setProperties(dispatchFlags.requiresCoherency, dispatchFlags.numGrfRequired, - this->requiredThreadArbitrationPolicy); - EncodeComputeMode::programComputeModeCommand(stream, properties.stateComputeMode, hwInfo); + EncodeComputeMode::programComputeModeCommand(stream, this->streamProperties.stateComputeMode, hwInfo); if (csrSizeRequestFlags.hasSharedHandles) { auto pc = stream.getSpaceForCmd(); *pc = GfxFamily::cmdInitPipeControl; } + programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, false); } }