fix: pass heap information when kernel operation is flushed

Related-To: NEO-8281

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2023-08-25 16:16:51 +00:00 committed by Compute-Runtime-Automation
parent ae0a6b8ea8
commit 0fc45fef1a
6 changed files with 415 additions and 132 deletions

View File

@ -113,81 +113,85 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
bool sbaDirty = this->csr->getGSBAStateDirty();
void *sshCpuPointer = nullptr;
NEO::IndirectHeap *dsh = nullptr;
NEO::IndirectHeap *ssh = nullptr;
if (kernelOperation) {
NEO::IndirectHeap *dsh = nullptr;
NEO::IndirectHeap *ssh = nullptr;
NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT);
this->csr->makeResident(*ioh->getGraphicsAllocation());
if (sbaDirty) {
this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages());
}
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
ssh = this->csr->getGlobalStatelessHeap();
this->csr->makeResident(*ssh->getGraphicsAllocation());
if (sbaDirty) {
this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT);
this->csr->makeResident(*ioh->getGraphicsAllocation());
if (this->requiredStreamState.stateBaseAddress.indirectObjectBaseAddress.value == NEO::StreamProperty64::initValue) {
this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages());
}
} else if (this->immediateCmdListHeapSharing) {
ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation;
if (ssh->getGraphicsAllocation()) {
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
ssh = this->csr->getGlobalStatelessHeap();
this->csr->makeResident(*ssh->getGraphicsAllocation());
if (this->requiredStreamState.stateBaseAddress.surfaceStateBaseAddress.value == NEO::StreamProperty64::initValue) {
this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
}
} else if (this->immediateCmdListHeapSharing) {
ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation;
if (ssh->getGraphicsAllocation()) {
this->csr->makeResident(*ssh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
}
if (this->dynamicHeapRequired) {
dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation;
if (dsh->getGraphicsAllocation()) {
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
}
if (this->dynamicHeapRequired) {
dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation;
if (dsh->getGraphicsAllocation()) {
this->csr->makeResident(*dsh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages());
}
}
} else {
if (this->dynamicHeapRequired) {
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
this->csr->makeResident(*dsh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages());
}
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
this->csr->makeResident(*ssh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
}
} else {
if (this->dynamicHeapRequired) {
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
this->csr->makeResident(*dsh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages());
sshCpuPointer = ssh->getCpuBase();
if (this->device->getL0Debugger()) {
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
}
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
this->csr->makeResident(*ssh->getGraphicsAllocation());
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
}
void *sshCpuPointer = ssh->getCpuBase();
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger()) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto &sshState = csrHw->getSshState();
bool sshDirty = sshState.updateAndCheck(ssh);
if (this->device->getL0Debugger()) {
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
}
if (sshDirty) {
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger()) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto &sshState = csrHw->getSshState();
bool sshDirty = sshState.updateAndCheck(ssh);
if (sshDirty) {
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = this->device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = this->device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = false;
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = this->device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = this->device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = false;
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
}
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
}
NEO::ImmediateDispatchFlags dispatchFlags{
@ -197,7 +201,6 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
hasStallingCmds // hasStallingCmds
};
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
CommandListImp::storeReferenceTsToMappedEvents(true);
return this->csr->flushImmediateTask(
@ -245,58 +248,60 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
false // isDcFlushRequiredOnStallingCommandsOnNextFlush
);
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT));
NEO::IndirectHeap *dsh = nullptr;
NEO::IndirectHeap *ssh = nullptr;
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
ssh = this->csr->getGlobalStatelessHeap();
} else if (this->immediateCmdListHeapSharing) {
auto &sshReserveConfig = this->commandContainer.getSurfaceStateHeapReserve();
if (sshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) {
ssh = sshReserveConfig.indirectHeapReservation;
if (kernelOperation) {
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
ssh = this->csr->getGlobalStatelessHeap();
} else if (this->immediateCmdListHeapSharing) {
auto &sshReserveConfig = this->commandContainer.getSurfaceStateHeapReserve();
if (sshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) {
ssh = sshReserveConfig.indirectHeapReservation;
}
auto &dshReserveConfig = this->commandContainer.getDynamicStateHeapReserve();
if (this->dynamicHeapRequired && dshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) {
dsh = dshReserveConfig.indirectHeapReservation;
}
} else {
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
}
auto &dshReserveConfig = this->commandContainer.getDynamicStateHeapReserve();
if (this->dynamicHeapRequired && dshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) {
dsh = dshReserveConfig.indirectHeapReservation;
if (this->device->getL0Debugger()) {
UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage));
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
}
} else {
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
}
if (this->device->getL0Debugger()) {
UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage));
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
}
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto sshStateCopy = csrHw->getSshState();
bool sshDirty = sshStateCopy.updateAndCheck(ssh);
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto sshStateCopy = csrHw->getSshState();
bool sshDirty = sshStateCopy.updateAndCheck(ssh);
if (sshDirty) {
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
if (sshDirty) {
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = this->device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = this->device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = false;
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = this->device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = this->device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = false;
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
}
}

View File

@ -296,6 +296,64 @@ void ImmediateCmdListSharedHeapsFixture::tearDown() {
ModuleMutableCommandListFixture::tearDown();
}
void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::setUp(int32_t useImmediateFlushTask) {
this->useImmediateFlushTask = useImmediateFlushTask;
DebugManager.flags.UseImmediateFlushTask.set(useImmediateFlushTask);
ImmediateCmdListSharedHeapsFixture::setUp();
}
void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::appendNonKernelOperation(L0::ult::CommandList *currentCmdList, NonKernelOperation operation) {
ze_result_t result;
if (operation == NonKernelOperation::Barrier) {
result = currentCmdList->appendBarrier(nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::SignalEvent) {
result = currentCmdList->appendSignalEvent(event->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::ResetEvent) {
result = currentCmdList->appendEventReset(event->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::WaitOnEvents) {
auto eventHandle = event->toHandle();
result = currentCmdList->appendWaitOnEvents(1, &eventHandle, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::WriteGlobalTimestamp) {
uint64_t timestampAddress = 0xfffffffffff0L;
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
result = currentCmdList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::MemoryRangesBarrier) {
uint8_t dstPtr[64] = {};
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
result = currentCmdList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
driverHandle->releaseImportedPointer(dstPtr);
}
}
void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::validateDispatchFlags(bool nonKernel, NEO::ImmediateDispatchFlags &recordedImmediateFlushTaskFlags, const NEO::IndirectHeap *recordedSsh) {
if (this->useImmediateFlushTask == 1) {
if (nonKernel) {
EXPECT_EQ(nullptr, recordedImmediateFlushTaskFlags.sshCpuBase);
} else {
EXPECT_NE(nullptr, recordedImmediateFlushTaskFlags.sshCpuBase);
}
} else {
if (nonKernel) {
EXPECT_EQ(nullptr, recordedSsh);
} else {
EXPECT_NE(nullptr, recordedSsh);
}
}
}
bool AppendFillFixture::MockDriverFillHandle::findAllocationDataForRange(const void *buffer,
size_t size,
NEO::SvmAllocationData **allocData) {

View File

@ -169,6 +169,35 @@ struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixtu
std::unique_ptr<Event> event;
};
struct ImmediateCmdListSharedHeapsFlushTaskFixtureInit : public ImmediateCmdListSharedHeapsFixture {
void setUp(int32_t useImmediateFlushTask);
enum NonKernelOperation {
Barrier = 0,
SignalEvent,
ResetEvent,
WaitOnEvents,
WriteGlobalTimestamp,
MemoryRangesBarrier
};
template <typename FamilyType>
void testBody(NonKernelOperation operation);
void appendNonKernelOperation(L0::ult::CommandList *currentCmdList, NonKernelOperation operation);
void validateDispatchFlags(bool nonKernel, NEO::ImmediateDispatchFlags &recordedImmediateFlushTaskFlags, const NEO::IndirectHeap *recordedSsh);
int32_t useImmediateFlushTask;
};
template <int32_t useImmediateFlushTaskT>
struct ImmediateCmdListSharedHeapsFlushTaskFixture : public ImmediateCmdListSharedHeapsFlushTaskFixtureInit {
void setUp() {
ImmediateCmdListSharedHeapsFlushTaskFixtureInit::setUp(useImmediateFlushTaskT);
}
};
class AppendFillFixture : public DeviceFixture {
public:
class MockDriverFillHandle : public L0::DriverHandleImp {

View File

@ -7,6 +7,7 @@
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
@ -1212,5 +1213,61 @@ void TbxImmediateCommandListFixture::setUpT() {
setEvent();
}
template <typename FamilyType>
void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::testBody(NonKernelOperation operation) {
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = ZE_RESULT_SUCCESS;
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
auto &cmdContainer = commandListImmediate->commandContainer;
auto &cmdContainerCoexisting = commandListImmediateCoexisting->commandContainer;
auto sshFirstCmdList = cmdContainer.getSurfaceStateHeapReserve().indirectHeapReservation;
auto sshCoexistingCmdList = cmdContainerCoexisting.getSurfaceStateHeapReserve().indirectHeapReservation;
void *firstSshCpuPointer = sshFirstCmdList->getCpuBase();
EXPECT_EQ(sshFirstCmdList->getCpuBase(), sshCoexistingCmdList->getCpuBase());
auto csrSshHeap = &ultCsr.getIndirectHeap(HeapType::SURFACE_STATE, 0);
EXPECT_EQ(csrSshHeap->getCpuBase(), sshFirstCmdList->getCpuBase());
csrSshHeap->getSpace(csrSshHeap->getAvailableSpace());
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
EXPECT_NE(firstSshCpuPointer, sshFirstCmdList->getCpuBase());
result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
EXPECT_EQ(sshFirstCmdList->getCpuBase(), sshCoexistingCmdList->getCpuBase());
EXPECT_EQ(csrSshHeap->getCpuBase(), sshFirstCmdList->getCpuBase());
appendNonKernelOperation(commandListImmediate.get(), operation);
validateDispatchFlags(true, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
appendNonKernelOperation(commandListImmediateCoexisting.get(), operation);
validateDispatchFlags(true, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
}
} // namespace ult
} // namespace L0

View File

@ -9,6 +9,7 @@
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
@ -19,7 +20,7 @@
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@ -313,9 +314,9 @@ HWTEST2_F(CommandListAppendLaunchKernelResetKernelCount, givenIsKernelSplitOpera
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
CmdListKernelLaunchParams launchParams = {};
{
event->zeroKernelCount();
@ -395,9 +396,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -427,9 +428,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -459,9 +460,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -491,9 +492,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -523,9 +524,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -555,9 +556,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -587,9 +588,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -620,9 +621,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -652,9 +653,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -695,9 +696,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
@ -756,9 +757,9 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true, false);
@ -798,9 +799,9 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false, false);
@ -1300,5 +1301,136 @@ HWTEST2_F(CommandListCreate, givenStateBaseAddressTrackingStateWhenCommandListCr
}
}
using ImmediateCmdListSharedHeapsRegularFlushTaskTest = Test<ImmediateCmdListSharedHeapsFlushTaskFixture<0>>;
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendBarrierProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::Barrier);
}
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendSignalEventProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::SignalEvent);
}
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendResetEventProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::ResetEvent);
}
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendWaitOnEventsProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::WaitOnEvents);
}
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendWriteGlobalTimestampProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::WriteGlobalTimestamp);
}
HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest,
givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendMemoryRangesBarrierProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::MemoryRangesBarrier);
}
using ImmediateCmdListSharedHeapsImmediateFlushTaskTest = Test<ImmediateCmdListSharedHeapsFlushTaskFixture<1>>;
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendBarrierProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::Barrier);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendSignalEventProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::SignalEvent);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendResetEventProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::ResetEvent);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendWaitOnEventsProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::WaitOnEvents);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendWriteGlobalTimestampProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::WriteGlobalTimestamp);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendMemoryRangesBarrierProvidesNoHeapInfo,
IsAtLeastSkl) {
testBody<FamilyType>(NonKernelOperation::MemoryRangesBarrier);
}
HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest,
givenImmediateCommandListWhenFirstAppendIsNonKernelAppendAndSecondAppendIsKernelAppendThenExpectAllBaseAddressSbaCommandBeforeSecondAppend,
IsAtLeastXeHpCore) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = ultCsr.commandStream;
size_t csrUsedBefore = csrStream.getUsed();
appendNonKernelOperation(commandListImmediate.get(), NonKernelOperation::Barrier);
size_t csrUsedAfter = csrStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
(csrUsedAfter - csrUsedBefore)));
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto sbaCmd = genCmdCast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
EXPECT_EQ(0u, sbaCmd->getSurfaceStateBaseAddress());
EXPECT_FALSE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(0u, sbaCmd->getGeneralStateBaseAddress());
EXPECT_FALSE(sbaCmd->getGeneralStateBaseAddressModifyEnable());
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = ZE_RESULT_SUCCESS;
csrUsedBefore = csrStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
csrUsedAfter = csrStream.getUsed();
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
(csrUsedAfter - csrUsedBefore)));
sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto csrSshHeap = &ultCsr.getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
auto &commandContainer = commandList->getCmdContainer();
auto ioh = commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto ioBaseAddressDecanonized = neoDevice->getGmmHelper()->decanonize(ioh->getHeapGpuBase());
sbaCmd = genCmdCast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
EXPECT_EQ(csrSshHeap->getHeapGpuBase(), sbaCmd->getSurfaceStateBaseAddress());
EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(ioBaseAddressDecanonized, sbaCmd->getGeneralStateBaseAddress());
EXPECT_TRUE(sbaCmd->getGeneralStateBaseAddressModifyEnable());
}
} // namespace ult
} // namespace L0

View File

@ -196,6 +196,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
recordedDispatchFlags = dispatchFlags;
recordedSsh = ssh;
this->lastFlushedCommandStream = &commandStream;
return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
}
@ -451,6 +452,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
TaskCountType flushBcsTaskReturnValue{};
LinearStream *lastFlushedCommandStream = nullptr;
const IndirectHeap *recordedSsh = nullptr;
std::atomic<uint32_t> recursiveLockCounter;
std::atomic<uint32_t> waitForCompletionWithTimeoutTaskCountCalled{0};