From 49b7c0bec458db0358b93105915c4d6011084c15 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Tue, 9 May 2023 14:17:01 +0000 Subject: [PATCH] performance: Adjust BCS split masks Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/device/bcs_split.h | 4 +- .../xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp | 40 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index ee9ddd4a3b..72e7f56e3a 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -56,8 +56,8 @@ struct BcsSplit { std::vector h2dCmdQs; std::vector d2hCmdQs; - inline static constexpr size_t h2dEngineMask = 0b010001000; - inline static constexpr size_t d2hEngineMask = 0b000100010; + inline static constexpr size_t h2dEngineMask = 0b000001010; + inline static constexpr size_t d2hEngineMask = 0b010100000; NEO::BcsInfoMask engines = NEO::EngineHelpers::oddLinkedCopyEnginesMask; NEO::BcsInfoMask h2dEngines = h2dEngineMask; NEO::BcsInfoMask d2hEngines = d2hEngineMask; diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index d61b9f3462..78cf182f08 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -631,10 +631,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u); EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); context->freeMem(srcPtr); @@ -696,10 +696,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u); EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); context->freeMem(srcPtr); @@ -755,10 +755,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenRelaxedOrderingNotAllowedWhenDispatchS EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u); EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); uint32_t semaphoresFound = 0; @@ -820,10 +820,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); context->freeMem(srcPtr); context->freeMem(dstPtr); @@ -872,10 +872,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); context->freeMem(srcPtr); context->freeMem(dstPtr); @@ -923,10 +923,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); context->freeMem(srcPtr); context->freeMem(dstPtr); @@ -987,10 +987,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList0->getCmdContainer().getCommandStream()->getCpuBase(), commandList0->getCmdContainer().getCommandStream()->getUsed())); @@ -1050,10 +1050,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList0->getCmdContainer().getCommandStream()->getCpuBase(), commandList0->getCmdContainer().getCommandStream()->getUsed())); @@ -1124,10 +1124,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList0->getCmdContainer().getCommandStream()->getCpuBase(), commandList0->getCmdContainer().getCommandStream()->getUsed())); @@ -1314,10 +1314,10 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe size, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); - EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); context->freeMem(srcPtr); context->freeMem(dstPtr);