mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Fix optimize timestamp packet dependiencies
-program barrier after global fence allocation is programmed -do not double barrier timestamp in blit enqueue -flush GPGPU while submitting to BCS when barrier requested Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
45d23868de
commit
9ff1307b4b
@ -1,12 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
@ -85,4 +90,37 @@ static const cl_command_queue_properties DefaultCommandQueueProperties[] = {
|
||||
0,
|
||||
CL_QUEUE_PROFILING_ENABLE,
|
||||
};
|
||||
|
||||
template <bool ooq>
|
||||
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
|
||||
using ContextFixture::SetUp;
|
||||
|
||||
void SetUp() override {
|
||||
hwInfo = *::defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
|
||||
|
||||
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
ClDeviceFixture::SetUpImpl(&hwInfo);
|
||||
cl_device_id device = pClDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
cl_command_queue_properties queueProperties = ooq ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
|
||||
CommandQueueHwFixture::SetUp(pClDevice, queueProperties);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
CommandQueueHwFixture::TearDown();
|
||||
ContextFixture::TearDown();
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
HardwareInfo hwInfo{};
|
||||
DebugManagerStateRestore state{};
|
||||
};
|
||||
|
||||
using IoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<false>;
|
||||
using OoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<true>;
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -15,16 +15,12 @@
|
||||
#include "shared/test/common/mocks/mock_os_library.h"
|
||||
#include "shared/test/common/mocks/mock_source_level_debugger.h"
|
||||
#include "shared/test/common/test_macros/matchers.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/image_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
@ -1501,38 +1497,6 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode);
|
||||
}
|
||||
|
||||
template <bool ooq>
|
||||
struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test {
|
||||
using ContextFixture::SetUp;
|
||||
|
||||
void SetUp() override {
|
||||
hwInfo = *::defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
|
||||
|
||||
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
ClDeviceFixture::SetUpImpl(&hwInfo);
|
||||
cl_device_id device = pClDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
cl_command_queue_properties queueProperties = ooq ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
|
||||
CommandQueueHwFixture::SetUp(pClDevice, queueProperties);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
CommandQueueHwFixture::TearDown();
|
||||
ContextFixture::TearDown();
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
HardwareInfo hwInfo{};
|
||||
DebugManagerStateRestore state{};
|
||||
};
|
||||
|
||||
using IoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<false>;
|
||||
using OoqCommandQueueHwBlitTest = CommandQueueHwBlitTest<true>;
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
@ -1648,6 +1612,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandTh
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
|
||||
|
||||
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||
GTEST_SKIP();
|
||||
@ -1702,6 +1667,10 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandT
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(bcsHwParser.cmdList.end(), find<PIPE_CONTROL *>(semaphoreItor, bcsHwParser.cmdList.end()));
|
||||
|
||||
// Only one barrier semaphore from first BCS enqueue
|
||||
const auto blitItor = find<XY_COPY_BLT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(1u, findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), blitItor).size());
|
||||
}
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
|
||||
|
@ -1,19 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@ -460,3 +465,42 @@ HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelec
|
||||
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(OoqCommandQueueHwBlitTest, givenBarrierBeforeFirstKernelWhenEnqueueNDRangeThenProgramBarrierBeforeGlobalAllocation, IsPVC) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
|
||||
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.ForceCacheFlushForBcs.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
MockKernel *kernel = mockKernelWithInternals.mockKernel;
|
||||
size_t offset = 0;
|
||||
size_t gws = 1;
|
||||
BufferDefaults::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<>::create());
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto memFenceStateItor = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
const auto memFenceItor = find<MI_MEM_FENCE *>(memFenceStateItor, ccsHwParser.cmdList.end());
|
||||
EXPECT_NE(ccsHwParser.cmdList.end(), memFenceItor);
|
||||
EXPECT_NE(ccsHwParser.cmdList.end(), memFenceStateItor);
|
||||
}
|
Reference in New Issue
Block a user