Revert "Remove programming first SCM from the command list"

This reverts commit 9c06af79ad.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2022-03-26 23:39:40 +01:00
committed by Compute-Runtime-Automation
parent d77a6cbe4b
commit 34a5c6cd28
6 changed files with 20 additions and 172 deletions

View File

@@ -2226,10 +2226,9 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo);
finalStreamState = requiredStreamState;
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo);
containsAnyKernel = true;
return;
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);

View File

@@ -42,8 +42,6 @@ struct CommandQueueHw : public CommandQueueImp {
MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool isFrontEndStateDirty, uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists);
MOCKABLE_VIRTUAL size_t estimateStateComputeModeCmdSizeForMultipleCommandLists(uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists);
size_t estimateFrontEndCmdSize();
size_t estimatePipelineSelect();
void programPipelineSelect(NEO::LinearStream &commandStream);

View File

@@ -254,7 +254,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirty, numCommandLists, phCommandLists);
linearStreamSizeEstimate += estimateStateComputeModeCmdSizeForMultipleCommandLists(numCommandLists, phCommandLists);
if (gsbaStateDirty) {
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
@@ -382,11 +381,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
if (!isCopyOnlyCommandQueue) {
auto &requiredStreamState = commandList->getRequiredStreamState();
streamProperties.stateComputeMode.setProperties(requiredStreamState.stateComputeMode);
bool programVfe = frontEndStateDirty;
if (isPatchingVfeStateAllowed) {
auto &requiredStreamState = commandList->getRequiredStreamState();
streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState);
programVfe |= streamProperties.frontEndState.isDirty();
}
@@ -395,14 +392,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child);
frontEndStateDirty = false;
}
if (streamProperties.stateComputeMode.isDirty()) {
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
child, streamProperties.stateComputeMode, {}, false, hwInfo, csr->isRcs());
}
auto &finalStreamState = commandList->getFinalStreamState();
streamProperties.stateComputeMode.setProperties(finalStreamState.stateComputeMode);
if (isPatchingVfeStateAllowed) {
auto &finalStreamState = commandList->getFinalStreamState();
streamProperties.frontEndState.setProperties(finalStreamState.frontEndState);
}
}
@@ -541,31 +533,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
return estimatedSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateStateComputeModeCmdSizeForMultipleCommandLists(
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
size_t singleScmCmdSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->device->getHwInfo(), false, csr->isRcs());
auto streamPropertiesCopy = csr->getStreamProperties();
size_t estimatedSize = 0;
for (size_t i = 0; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto &requiredStreamState = commandList->getRequiredStreamState();
streamPropertiesCopy.stateComputeMode.setProperties(requiredStreamState.stateComputeMode);
if (streamPropertiesCopy.stateComputeMode.isDirty()) {
estimatedSize += singleScmCmdSize;
}
auto &finalStreamState = commandList->getFinalStreamState();
streamPropertiesCopy.stateComputeMode.setProperties(finalStreamState.stateComputeMode);
}
return estimatedSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelect() {

View File

@@ -481,6 +481,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
auto event1 = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
events.push_back(event1.get());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -494,6 +496,12 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
if (MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
EXPECT_NE(cmdList.end(), itor);

View File

@@ -284,7 +284,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemor
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
EXPECT_EQ(7u, allPcCommands.size());
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) {
@@ -331,7 +331,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMem
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(5u, allPcCommands.size());
EXPECT_EQ(6u, allPcCommands.size());
context->freeMem(srcBuffer);
context->freeMem(dstBuffer);
@@ -374,7 +374,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWit
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(5u, allPcCommands.size());
EXPECT_EQ(6u, allPcCommands.size());
context->freeMem(dstBuffer);
}
@@ -417,7 +417,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWit
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
EXPECT_EQ(7u, allPcCommands.size());
context->freeMem(dstBuffer);
}
@@ -460,7 +460,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(5u, allPcCommands.size());
EXPECT_EQ(6u, allPcCommands.size());
context->freeMem(dstBuffer);
}
@@ -506,7 +506,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(5u, allPcCommands.size());
EXPECT_EQ(6u, allPcCommands.size());
auto cmd = genCmdCast<PIPE_CONTROL *>(*allPcCommands.back());
EXPECT_TRUE(cmd->getDcFlushEnable());
@@ -562,6 +562,9 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_FALSE(cmd->getDcFlushEnable());

View File

@@ -21,7 +21,6 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "test_traits_common.h"
@@ -933,131 +932,5 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
}
}
using CommandQueueExecuteTest = Test<DeviceFixture>;
HWTEST2_F(CommandQueueExecuteTest, whenExecuteCommandListsIsCalledThenCorrectSizeOfScmCmdsIsCalculatedAndCorrectStateIsSet, IsAtLeastXeHpCore) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
DebugManagerStateRestore restorer;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr = nullptr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ASSERT_NE(nullptr, csr);
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>{device, csr, &desc};
commandQueue->initialize(false, false);
Mock<::L0::Kernel> kernelA;
auto pMockModule1 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernelA.module = pMockModule1.get();
kernelA.immutableData.kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
Mock<::L0::Kernel> kernelB;
auto pMockModule2 = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernelB.module = pMockModule2.get();
kernelB.immutableData.kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
ze_group_count_t threadGroupDimensions{1, 1, 1};
auto commandListA = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListA->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListA->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
auto commandListBB = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListBB->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListBB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
commandListBB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
auto commandListAB = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListAB->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListAB->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
commandListAB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
auto commandListBA = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListBA->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListBA->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
commandListBA->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
auto commandListBAB = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListBAB->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListBAB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
commandListBAB->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
commandListBAB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
auto commandListAAB = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListAAB->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandListAAB->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
commandListAAB->appendLaunchKernelWithParams(&kernelA, &threadGroupDimensions, nullptr, false, false, false);
commandListAAB->appendLaunchKernelWithParams(&kernelB, &threadGroupDimensions, nullptr, false, false, false);
auto commandListEmpty = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandListEmpty->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
auto getScmCommandsCount = [](MockCommandQueueHw<gfxCoreFamily> &queue, size_t streamOffset) -> size_t {
GenCmdList cmdList;
FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(queue.commandStream->getCpuBase(), streamOffset), queue.commandStream->getUsed());
return findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end()).size();
};
size_t singleScmCmdSize = NEO::EncodeComputeMode<FamilyType>::getCmdSizeForComputeMode(*defaultHwInfo, false, true);
EXPECT_EQ(-1, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListA->toHandle(), commandListAB->toHandle(),
commandListBA->toHandle(), commandListA->toHandle()};
EXPECT_EQ(1 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(4, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(4, commandLists, nullptr, false);
EXPECT_EQ(1u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(0, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListAAB->toHandle(), commandListBAB->toHandle(), commandListA->toHandle()};
EXPECT_EQ(1 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(3, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(3, commandLists, nullptr, false);
EXPECT_EQ(1u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(0, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListEmpty->toHandle(), commandListA->toHandle()};
EXPECT_EQ(0 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(2, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(0u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(0, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListBB->toHandle()};
EXPECT_EQ(1 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(1, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(1, commandLists, nullptr, false);
EXPECT_EQ(1u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(1, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListA->toHandle()};
EXPECT_EQ(1 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(1, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(1, commandLists, nullptr, false);
EXPECT_EQ(1u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(0, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
{
ze_command_list_handle_t commandLists[] = {commandListAB->toHandle(), commandListAB->toHandle(),
commandListAB->toHandle(), commandListAB->toHandle()};
EXPECT_EQ(3 * singleScmCmdSize, commandQueue->estimateStateComputeModeCmdSizeForMultipleCommandLists(4, commandLists));
auto commandsOffset = commandQueue->commandStream->getUsed();
commandQueue->executeCommandLists(4, commandLists, nullptr, false);
EXPECT_EQ(3u, getScmCommandsCount(*commandQueue, commandsOffset));
}
EXPECT_EQ(1, csr->getStreamProperties().stateComputeMode.largeGrfMode.value);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0