mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
fix: don't program dummy blit when no copy command was programmed
Related-To: NEO-9996, NEO-10571 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6ac90f76c5
commit
a22cefdaed
@@ -191,13 +191,4 @@ void CommandList::synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t
|
||||
event->hostSynchronize(std::numeric_limits<uint64_t>::max());
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::makeResidentDummyAllocation() {
|
||||
if (isCopyOnly()) {
|
||||
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
auto dummyAllocation = rootDeviceEnvironment.getDummyAllocation();
|
||||
commandContainer.addToResidencyContainer(dummyAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -354,7 +354,6 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool getDcFlushRequired(bool externalCondition) const {
|
||||
return externalCondition ? dcFlushSupport : false;
|
||||
}
|
||||
void makeResidentDummyAllocation();
|
||||
MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
|
||||
@@ -345,6 +345,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void addCmdForPatching(std::shared_ptr<NEO::InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
|
||||
uint64_t getInOrderIncrementValue() const;
|
||||
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
||||
void encodeMiFlush(uint64_t immediateDataGpuAddress, uint64_t immediateData, NEO::MiFlushArgs &args);
|
||||
|
||||
NEO::InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
|
||||
|
||||
|
||||
@@ -1224,7 +1224,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
|
||||
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
|
||||
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *this->dummyBlitWa.rootDeviceEnvironment);
|
||||
makeResidentDummyAllocation();
|
||||
dummyBlitWa.isWaRequired = true;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1274,7 +1274,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
} else {
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), rootDeviceEnvironment);
|
||||
}
|
||||
makeResidentDummyAllocation();
|
||||
dummyBlitWa.isWaRequired = true;
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -1303,7 +1303,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *dummyBlitWa.rootDeviceEnvironment);
|
||||
makeResidentDummyAllocation();
|
||||
dummyBlitWa.isWaRequired = true;
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -2142,7 +2142,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
*commandContainer.getCommandStream(),
|
||||
size,
|
||||
neoDevice->getRootDeviceEnvironmentRef());
|
||||
makeResidentDummyAllocation();
|
||||
dummyBlitWa.isWaRequired = true;
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
|
||||
@@ -2183,8 +2183,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
} else {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
|
||||
makeResidentDummyAllocation();
|
||||
encodeMiFlush(0, 0, args);
|
||||
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false);
|
||||
}
|
||||
appendWriteKernelTimestamp(event, nullptr, beforeWalker, false, false);
|
||||
@@ -2480,7 +2479,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
if (dcFlushRequired) {
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
|
||||
encodeMiFlush(0, 0, args);
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
@@ -2520,10 +2519,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
if (isImmediateType() && isCopyOnly() && trackDependencies) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.commandWithPostSync = true;
|
||||
args.waArgs.isWaRequired = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), this->csr->getBarrierCountGpuAddress(), this->csr->getNextBarrierCount() + 1, args);
|
||||
encodeMiFlush(this->csr->getBarrierCountGpuAddress(), this->csr->getNextBarrierCount() + 1, args);
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
makeResidentDummyAllocation();
|
||||
}
|
||||
|
||||
if (apiRequest) {
|
||||
@@ -2728,12 +2725,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.timeStampOperation = true;
|
||||
args.commandWithPostSync = true;
|
||||
args.waArgs.isWaRequired = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(),
|
||||
allocationStruct.alignedAllocationPtr,
|
||||
0,
|
||||
args);
|
||||
makeResidentDummyAllocation();
|
||||
encodeMiFlush(allocationStruct.alignedAllocationPtr, 0, args);
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.blockSettingPostSyncProperties = true;
|
||||
@@ -3277,14 +3269,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
TaskCountType value = 0u;
|
||||
if (isImmediateType()) {
|
||||
args.commandWithPostSync = true;
|
||||
args.waArgs.isWaRequired = true;
|
||||
gpuAddress = this->csr->getBarrierCountGpuAddress();
|
||||
value = this->csr->getNextBarrierCount() + 1;
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), gpuAddress, value, args);
|
||||
makeResidentDummyAllocation();
|
||||
encodeMiFlush(gpuAddress, value, args);
|
||||
} else {
|
||||
appendComputeBarrierCommand();
|
||||
}
|
||||
@@ -3489,10 +3479,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.commandWithPostSync = true;
|
||||
args.waArgs.isWaRequired = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), gpuAddress,
|
||||
data, args);
|
||||
makeResidentDummyAllocation();
|
||||
encodeMiFlush(gpuAddress,
|
||||
data, args);
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope);
|
||||
@@ -3564,14 +3552,8 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy(uint64_t gpuAddr
|
||||
|
||||
NEO::MiFlushArgs miFlushArgs{this->dummyBlitWa};
|
||||
miFlushArgs.commandWithPostSync = true;
|
||||
miFlushArgs.waArgs.isWaRequired = true;
|
||||
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(
|
||||
*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
value,
|
||||
miFlushArgs);
|
||||
makeResidentDummyAllocation();
|
||||
encodeMiFlush(gpuAddress, value, miFlushArgs);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -3780,4 +3762,14 @@ uint64_t CommandListCoreFamily<gfxCoreFamily>::getInOrderIncrementValue() const
|
||||
return (this->inOrderAtomicSignalingEnabled ? this->getPartitionCount() : 1);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::encodeMiFlush(uint64_t immediateDataGpuAddress, uint64_t immediateData, NEO::MiFlushArgs &args) {
|
||||
auto isDummyBlitRequired = NEO::BlitCommandsHelper<GfxFamily>::isDummyBlitWaNeeded(args.waArgs);
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), immediateDataGpuAddress, immediateData, args);
|
||||
if (isDummyBlitRequired) {
|
||||
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
auto dummyAllocation = rootDeviceEnvironment.getDummyAllocation();
|
||||
commandContainer.addToResidencyContainer(dummyAllocation);
|
||||
}
|
||||
}
|
||||
} // namespace L0
|
||||
|
||||
@@ -540,6 +540,8 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::dcFlushSupport;
|
||||
using BaseClass::device;
|
||||
using BaseClass::dummyBlitWa;
|
||||
using BaseClass::encodeMiFlush;
|
||||
using BaseClass::ownedPrivateAllocations;
|
||||
|
||||
ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS,
|
||||
@@ -632,6 +634,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
||||
using BaseClass::copyThroughLockedPtrEnabled;
|
||||
using BaseClass::dcFlushSupport;
|
||||
using BaseClass::dependenciesPresent;
|
||||
using BaseClass::dummyBlitWa;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
|
||||
@@ -3024,6 +3024,56 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
|
||||
EXPECT_EQ(Builtin::fillBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless));
|
||||
EXPECT_EQ(Builtin::fillBufferRightLeftoverStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferRightLeftover>(isStateless, isHeapless));
|
||||
}
|
||||
HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushThenDummyBlitIsProgrammedPriorToMiFlushAndDummyAllocationIsAddedToResidencyContainer, IsAtLeastXeHpCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.ForceDummyBlitWa.set(1);
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdlist;
|
||||
cmdlist.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
cmdlist.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
auto &commandContainer = cmdlist.getCmdContainer();
|
||||
cmdlist.dummyBlitWa.isWaRequired = true;
|
||||
MiFlushArgs args{cmdlist.dummyBlitWa};
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
|
||||
commandContainer.getResidencyContainer().clear();
|
||||
EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation());
|
||||
cmdlist.encodeMiFlush(0, 0, args);
|
||||
GenCmdList programmedCommands;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
programmedCommands, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_FLUSH_DW *>(programmedCommands.begin(), programmedCommands.end());
|
||||
EXPECT_NE(programmedCommands.begin(), itor);
|
||||
EXPECT_NE(programmedCommands.end(), itor);
|
||||
auto firstCommand = programmedCommands.begin();
|
||||
UnitTestHelper<FamilyType>::verifyDummyBlitWa(&rootDeviceEnvironment, firstCommand);
|
||||
EXPECT_NE(nullptr, rootDeviceEnvironment.getDummyAllocation());
|
||||
EXPECT_EQ(commandContainer.getResidencyContainer().size(), 1u);
|
||||
EXPECT_EQ(commandContainer.getResidencyContainer()[0], rootDeviceEnvironment.getDummyAllocation());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenDummyBlitNotRequiredWhenEncodeMiFlushThenDummyBlitIsNotProgrammedAndDummyAllocationIsNotAddedToResidencyContainer, IsAtLeastXeHpCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.ForceDummyBlitWa.set(0);
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdlist;
|
||||
cmdlist.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
cmdlist.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
auto &commandContainer = cmdlist.getCmdContainer();
|
||||
cmdlist.dummyBlitWa.isWaRequired = true;
|
||||
MiFlushArgs args{cmdlist.dummyBlitWa};
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
|
||||
rootDeviceEnvironment.initDummyAllocation();
|
||||
EXPECT_NE(nullptr, rootDeviceEnvironment.getDummyAllocation());
|
||||
commandContainer.getResidencyContainer().clear();
|
||||
cmdlist.encodeMiFlush(0, 0, args);
|
||||
GenCmdList programmedCommands;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
programmedCommands, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_FLUSH_DW *>(programmedCommands.begin(), programmedCommands.end());
|
||||
EXPECT_EQ(programmedCommands.begin(), itor);
|
||||
EXPECT_NE(programmedCommands.end(), itor);
|
||||
EXPECT_EQ(commandContainer.getResidencyContainer().size(), 0u);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -464,7 +464,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||
size_t usedAfterSize = cmdStream->getUsed();
|
||||
|
||||
NEO::EncodeDummyBlitWaArgs waArgs{true, const_cast<RootDeviceEnvironment *>(&device->getNEODevice()->getRootDeviceEnvironment())};
|
||||
NEO::EncodeDummyBlitWaArgs waArgs{false, const_cast<RootDeviceEnvironment *>(&device->getNEODevice()->getRootDeviceEnvironment())};
|
||||
size_t expectedSize = NEO::EncodeMiFlushDW<FamilyType>::getCommandSizeWithWa(waArgs) * packets;
|
||||
EXPECT_EQ(expectedSize, (usedAfterSize - usedBeforeSize));
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
@@ -835,5 +836,178 @@ HWTEST2_F(AppendMemoryCopy,
|
||||
false);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCopyCommandListImmediateWithDummyBlitWaWhenCopyMemoryRegionThenDummyBlitIsNotProgrammedButIsRequiredForNextFlushProgramming, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
NEO::debugManager.flags.ForceDummyBlitWa.set(1);
|
||||
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
cmdList.isFlushTaskSubmissionEnabled = true;
|
||||
cmdList.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
void *buffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &buffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
uint32_t offset = 64u;
|
||||
void *srcPtr = ptrOffset(buffer, offset);
|
||||
void *dstPtr = buffer;
|
||||
const auto numSlices = 32u;
|
||||
ze_copy_region_t dstRegion = {0, 0, 0, 8, 4, numSlices};
|
||||
ze_copy_region_t srcRegion = {0, 0, 0, 8, 4, numSlices};
|
||||
|
||||
constexpr auto dstPitch = 32u;
|
||||
constexpr auto dstSlicePitch = 1024u;
|
||||
constexpr auto srcPitch = 64u;
|
||||
constexpr auto srcSlicePitch = 2048u;
|
||||
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, dstPitch, dstSlicePitch, srcPtr, &srcRegion, srcPitch, srcSlicePitch, nullptr, 0, nullptr, false, false);
|
||||
|
||||
auto &cmdContainer = cmdList.getCmdContainer();
|
||||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()));
|
||||
auto itors = findAll<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
|
||||
|
||||
EXPECT_EQ(numSlices, itors.size());
|
||||
for (auto i = 0u; i < numSlices; i++) {
|
||||
|
||||
auto itor = itors[i];
|
||||
ASSERT_NE(genCmdList.end(), itor);
|
||||
|
||||
auto bltCmd = genCmdCast<XY_COPY_BLT *>(*itor);
|
||||
EXPECT_EQ(bltCmd->getSourceBaseAddress(), reinterpret_cast<uintptr_t>(ptrOffset(srcPtr, srcSlicePitch * i)));
|
||||
EXPECT_EQ(bltCmd->getSourcePitch(), srcPitch);
|
||||
EXPECT_EQ(bltCmd->getDestinationBaseAddress(), reinterpret_cast<uintptr_t>(ptrOffset(dstPtr, dstSlicePitch * i)));
|
||||
EXPECT_EQ(bltCmd->getDestinationPitch(), dstPitch);
|
||||
}
|
||||
|
||||
if constexpr (IsPVC::isMatched<productFamily>()) {
|
||||
EXPECT_EQ(itors.back(), find<typename GfxFamily::MEM_SET *>(genCmdList.begin(), itors.back()));
|
||||
}
|
||||
EXPECT_EQ(itors.back(), find<XY_COLOR_BLT *>(genCmdList.begin(), itors.back()));
|
||||
|
||||
EXPECT_TRUE(cmdList.dummyBlitWa.isWaRequired);
|
||||
|
||||
context->freeMem(buffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWithDummyBlitWaWhenCopyMemoryRegionThenDummyBlitIsNotProgrammedButIsRequiredForNextFlushProgramming, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
NEO::debugManager.flags.ForceDummyBlitWa.set(1);
|
||||
|
||||
MockCommandListCoreFamily<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
cmdList.isFlushTaskSubmissionEnabled = true;
|
||||
cmdList.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
void *buffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &buffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
uint32_t offset = 64u;
|
||||
void *srcPtr = ptrOffset(buffer, offset);
|
||||
void *dstPtr = buffer;
|
||||
const auto numSlices = 32u;
|
||||
ze_copy_region_t dstRegion = {0, 0, 0, 8, 4, numSlices};
|
||||
ze_copy_region_t srcRegion = {0, 0, 0, 8, 4, numSlices};
|
||||
|
||||
constexpr auto dstPitch = 32u;
|
||||
constexpr auto dstSlicePitch = 1024u;
|
||||
constexpr auto srcPitch = 64u;
|
||||
constexpr auto srcSlicePitch = 2048u;
|
||||
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, dstPitch, dstSlicePitch, srcPtr, &srcRegion, srcPitch, srcSlicePitch, nullptr, 0, nullptr, false, false);
|
||||
|
||||
auto &cmdContainer = cmdList.getCmdContainer();
|
||||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()));
|
||||
auto itors = findAll<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
|
||||
|
||||
EXPECT_EQ(numSlices, itors.size());
|
||||
for (auto i = 0u; i < numSlices; i++) {
|
||||
|
||||
auto itor = itors[i];
|
||||
ASSERT_NE(genCmdList.end(), itor);
|
||||
|
||||
auto bltCmd = genCmdCast<XY_COPY_BLT *>(*itor);
|
||||
EXPECT_EQ(bltCmd->getSourceBaseAddress(), reinterpret_cast<uintptr_t>(ptrOffset(srcPtr, srcSlicePitch * i)));
|
||||
EXPECT_EQ(bltCmd->getSourcePitch(), srcPitch);
|
||||
EXPECT_EQ(bltCmd->getDestinationBaseAddress(), reinterpret_cast<uintptr_t>(ptrOffset(dstPtr, dstSlicePitch * i)));
|
||||
EXPECT_EQ(bltCmd->getDestinationPitch(), dstPitch);
|
||||
}
|
||||
|
||||
if constexpr (IsPVC::isMatched<productFamily>()) {
|
||||
EXPECT_EQ(itors.back(), find<typename GfxFamily::MEM_SET *>(genCmdList.begin(), itors.back()));
|
||||
}
|
||||
EXPECT_EQ(itors.back(), find<XY_COLOR_BLT *>(genCmdList.begin(), itors.back()));
|
||||
|
||||
EXPECT_TRUE(cmdList.dummyBlitWa.isWaRequired);
|
||||
|
||||
context->freeMem(buffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCopyCommandListImmediateWithDummyBlitWaWhenCopyMemoryThenDummyBlitIsNotProgrammedButIsRequiredForNextFlushProgramming, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
NEO::debugManager.flags.ForceDummyBlitWa.set(1);
|
||||
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
cmdList.isFlushTaskSubmissionEnabled = true;
|
||||
cmdList.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
constexpr size_t allocSize = 4096;
|
||||
void *buffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &buffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
uint32_t offset = 64u;
|
||||
void *srcPtr = ptrOffset(buffer, offset);
|
||||
void *dstPtr = buffer;
|
||||
constexpr auto size = 1;
|
||||
cmdList.appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false, false);
|
||||
|
||||
auto &cmdContainer = cmdList.getCmdContainer();
|
||||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()));
|
||||
auto itors = findAll<XY_COPY_BLT *>(genCmdList.begin(), genCmdList.end());
|
||||
|
||||
EXPECT_EQ(1u, itors.size());
|
||||
auto itor = itors[0];
|
||||
ASSERT_NE(genCmdList.end(), itor);
|
||||
|
||||
auto bltCmd = genCmdCast<XY_COPY_BLT *>(*itor);
|
||||
EXPECT_EQ(bltCmd->getSourceBaseAddress(), reinterpret_cast<uintptr_t>(srcPtr));
|
||||
EXPECT_EQ(bltCmd->getDestinationBaseAddress(), reinterpret_cast<uintptr_t>(dstPtr));
|
||||
|
||||
if constexpr (IsPVC::isMatched<productFamily>()) {
|
||||
EXPECT_EQ(genCmdList.end(), find<typename GfxFamily::MEM_SET *>(genCmdList.begin(), genCmdList.end()));
|
||||
}
|
||||
EXPECT_EQ(genCmdList.end(), find<XY_COLOR_BLT *>(genCmdList.begin(), genCmdList.end()));
|
||||
|
||||
EXPECT_TRUE(cmdList.dummyBlitWa.isWaRequired);
|
||||
|
||||
context->freeMem(buffer);
|
||||
}
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -4827,10 +4827,6 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
|
||||
itor = ++semaphoreItor;
|
||||
}
|
||||
|
||||
if (device->getProductHelper().isDummyBlitWaRequired()) {
|
||||
UnitTestHelper<FamilyType>::verifyDummyBlitWa(&device->getNEODevice()->getRootDeviceEnvironment(), itor);
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // marker event
|
||||
|
||||
auto implicitCounterSdi = genCmdCast<MI_STORE_DATA_IMM *>(*(++itor));
|
||||
|
||||
Reference in New Issue
Block a user