From b674a2829d0f0a55d8349cb0ad4cf4fc898ab901 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Fri, 25 Aug 2023 16:16:51 +0000 Subject: [PATCH] fix: pass heap information when kernel operation is flushed Related-To: NEO-8281 Signed-off-by: Zbigniew Zdanowicz --- .../source/cmdlist/cmdlist_hw_immediate.inl | 215 +++++++++--------- .../unit_tests/fixtures/cmdlist_fixture.cpp | 58 +++++ .../unit_tests/fixtures/cmdlist_fixture.h | 29 +++ .../unit_tests/fixtures/cmdlist_fixture.inl | 57 +++++ .../sources/cmdlist/test_cmdlist_4.cpp | 130 ++++++++--- .../libult/ult_command_stream_receiver.h | 2 + 6 files changed, 360 insertions(+), 131 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 4550bf9f46..2d15700121 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -113,81 +113,87 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushBcsTask template NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) { - bool sbaDirty = this->csr->getGSBAStateDirty(); + void *sshCpuPointer = nullptr; - NEO::IndirectHeap *dsh = nullptr; - NEO::IndirectHeap *ssh = nullptr; + if (kernelOperation) { + bool sbaDirty = this->csr->getGSBAStateDirty(); - NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT); - this->csr->makeResident(*ioh->getGraphicsAllocation()); - if (sbaDirty) { - this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages()); - } + NEO::IndirectHeap *dsh = nullptr; + NEO::IndirectHeap *ssh = nullptr; - if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { - ssh = this->csr->getGlobalStatelessHeap(); - this->csr->makeResident(*ssh->getGraphicsAllocation()); + NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT); + this->csr->makeResident(*ioh->getGraphicsAllocation()); if (sbaDirty) { - this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); + this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages()); } - } else if (this->immediateCmdListHeapSharing) { - ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation; - if (ssh->getGraphicsAllocation()) { - this->csr->makeResident(*ssh->getGraphicsAllocation()); - this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), - ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); - } - if (this->dynamicHeapRequired) { - dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation; - if (dsh->getGraphicsAllocation()) { + if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { + ssh = this->csr->getGlobalStatelessHeap(); + this->csr->makeResident(*ssh->getGraphicsAllocation()); + if (sbaDirty) { + this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); + } + } else if (this->immediateCmdListHeapSharing) { + ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation; + if (ssh->getGraphicsAllocation()) { + this->csr->makeResident(*ssh->getGraphicsAllocation()); + + this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), + ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); + } + if (this->dynamicHeapRequired) { + dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation; + if (dsh->getGraphicsAllocation()) { + this->csr->makeResident(*dsh->getGraphicsAllocation()); + this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages()); + } + } + } else { + if (this->dynamicHeapRequired) { + dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE); this->csr->makeResident(*dsh->getGraphicsAllocation()); this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages()); } + ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE); + this->csr->makeResident(*ssh->getGraphicsAllocation()); + this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), + ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); } - } else { - if (this->dynamicHeapRequired) { - dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE); - this->csr->makeResident(*dsh->getGraphicsAllocation()); - this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages()); + + sshCpuPointer = ssh->getCpuBase(); + + if (this->device->getL0Debugger()) { + this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); + this->csr->makeResident(*this->device->getDebugSurface()); } - ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE); - this->csr->makeResident(*ssh->getGraphicsAllocation()); - this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), - ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); - } - void *sshCpuPointer = ssh->getCpuBase(); + NEO::Device *neoDevice = this->device->getNEODevice(); + if (neoDevice->getDebugger()) { + auto csrHw = static_cast *>(this->csr); + auto &sshState = csrHw->getSshState(); + bool sshDirty = sshState.updateAndCheck(ssh); - if (this->device->getL0Debugger()) { - this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); - this->csr->makeResident(*this->device->getDebugSurface()); - } + if (sshDirty) { + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; - NEO::Device *neoDevice = this->device->getNEODevice(); - if (neoDevice->getDebugger()) { - auto csrHw = static_cast *>(this->csr); - auto &sshState = csrHw->getSshState(); - bool sshDirty = sshState.updateAndCheck(ssh); - - if (sshDirty) { - auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); - auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; - - NEO::EncodeSurfaceStateArgs args; - args.outMemory = &surfaceState; - args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); - args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); - args.mocs = this->device->getMOCS(false, false); - args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); - args.allocation = this->device->getDebugSurface(); - args.gmmHelper = neoDevice->getGmmHelper(); - args.useGlobalAtomics = false; - args.areMultipleSubDevicesInContext = false; - args.isDebuggerActive = true; - NEO::EncodeSurfaceState::encodeBuffer(args); - *reinterpret_cast(surfaceStateSpace) = surfaceState; + NEO::EncodeSurfaceStateArgs args; + args.outMemory = &surfaceState; + args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); + args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); + args.mocs = this->device->getMOCS(false, false); + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.allocation = this->device->getDebugSurface(); + args.gmmHelper = neoDevice->getGmmHelper(); + args.useGlobalAtomics = false; + args.areMultipleSubDevicesInContext = false; + args.isDebuggerActive = true; + NEO::EncodeSurfaceState::encodeBuffer(args); + *reinterpret_cast(surfaceStateSpace) = surfaceState; + } } + + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); } NEO::ImmediateDispatchFlags dispatchFlags{ @@ -197,7 +203,6 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmedia hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies hasStallingCmds // hasStallingCmds }; - this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); CommandListImp::storeReferenceTsToMappedEvents(true); return this->csr->flushImmediateTask( @@ -245,58 +250,60 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushRegular false // isDcFlushRequiredOnStallingCommandsOnNextFlush ); - this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags); - this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); - auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT)); NEO::IndirectHeap *dsh = nullptr; NEO::IndirectHeap *ssh = nullptr; - if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { - ssh = this->csr->getGlobalStatelessHeap(); - } else if (this->immediateCmdListHeapSharing) { - auto &sshReserveConfig = this->commandContainer.getSurfaceStateHeapReserve(); - if (sshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) { - ssh = sshReserveConfig.indirectHeapReservation; + if (kernelOperation) { + this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags); + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); + + if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { + ssh = this->csr->getGlobalStatelessHeap(); + } else if (this->immediateCmdListHeapSharing) { + auto &sshReserveConfig = this->commandContainer.getSurfaceStateHeapReserve(); + if (sshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) { + ssh = sshReserveConfig.indirectHeapReservation; + } + auto &dshReserveConfig = this->commandContainer.getDynamicStateHeapReserve(); + if (this->dynamicHeapRequired && dshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) { + dsh = dshReserveConfig.indirectHeapReservation; + } + } else { + dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE); + ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE); } - auto &dshReserveConfig = this->commandContainer.getDynamicStateHeapReserve(); - if (this->dynamicHeapRequired && dshReserveConfig.indirectHeapReservation->getGraphicsAllocation()) { - dsh = dshReserveConfig.indirectHeapReservation; + + if (this->device->getL0Debugger()) { + UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage)); + this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); + this->csr->makeResident(*this->device->getDebugSurface()); } - } else { - dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE); - ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE); - } - if (this->device->getL0Debugger()) { - UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage)); - this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); - this->csr->makeResident(*this->device->getDebugSurface()); - } + NEO::Device *neoDevice = this->device->getNEODevice(); + if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) { + auto csrHw = static_cast *>(this->csr); + auto sshStateCopy = csrHw->getSshState(); + bool sshDirty = sshStateCopy.updateAndCheck(ssh); - NEO::Device *neoDevice = this->device->getNEODevice(); - if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) { - auto csrHw = static_cast *>(this->csr); - auto sshStateCopy = csrHw->getSshState(); - bool sshDirty = sshStateCopy.updateAndCheck(ssh); + if (sshDirty) { + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; - if (sshDirty) { - auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); - auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; - - NEO::EncodeSurfaceStateArgs args; - args.outMemory = &surfaceState; - args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); - args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); - args.mocs = this->device->getMOCS(false, false); - args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); - args.allocation = this->device->getDebugSurface(); - args.gmmHelper = neoDevice->getGmmHelper(); - args.useGlobalAtomics = false; - args.areMultipleSubDevicesInContext = false; - args.isDebuggerActive = true; - NEO::EncodeSurfaceState::encodeBuffer(args); - *reinterpret_cast(surfaceStateSpace) = surfaceState; + NEO::EncodeSurfaceStateArgs args; + args.outMemory = &surfaceState; + args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); + args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); + args.mocs = this->device->getMOCS(false, false); + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.allocation = this->device->getDebugSurface(); + args.gmmHelper = neoDevice->getGmmHelper(); + args.useGlobalAtomics = false; + args.areMultipleSubDevicesInContext = false; + args.isDebuggerActive = true; + NEO::EncodeSurfaceState::encodeBuffer(args); + *reinterpret_cast(surfaceStateSpace) = surfaceState; + } } } diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index c06183dcde..62065d2e23 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -296,6 +296,64 @@ void ImmediateCmdListSharedHeapsFixture::tearDown() { ModuleMutableCommandListFixture::tearDown(); } +void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::setUp(int32_t useImmediateFlushTask) { + this->useImmediateFlushTask = useImmediateFlushTask; + DebugManager.flags.UseImmediateFlushTask.set(useImmediateFlushTask); + + ImmediateCmdListSharedHeapsFixture::setUp(); +} + +void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::appendNonKernelOperation(L0::ult::CommandList *currentCmdList, NonKernelOperation operation) { + ze_result_t result; + + if (operation == NonKernelOperation::Barrier) { + result = currentCmdList->appendBarrier(nullptr, 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } else if (operation == NonKernelOperation::SignalEvent) { + result = currentCmdList->appendSignalEvent(event->toHandle()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } else if (operation == NonKernelOperation::ResetEvent) { + result = currentCmdList->appendEventReset(event->toHandle()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } else if (operation == NonKernelOperation::WaitOnEvents) { + auto eventHandle = event->toHandle(); + result = currentCmdList->appendWaitOnEvents(1, &eventHandle, false, false, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } else if (operation == NonKernelOperation::WriteGlobalTimestamp) { + uint64_t timestampAddress = 0xfffffffffff0L; + uint64_t *dstptr = reinterpret_cast(timestampAddress); + + result = currentCmdList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } else if (operation == NonKernelOperation::MemoryRangesBarrier) { + uint8_t dstPtr[64] = {}; + driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize); + + size_t rangeSizes = 1; + const void **ranges = reinterpret_cast(&dstPtr[0]); + result = currentCmdList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + driverHandle->releaseImportedPointer(dstPtr); + } +} + +void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::validateDispatchFlags(bool nonKernel, NEO::ImmediateDispatchFlags &recordedImmediateFlushTaskFlags, const NEO::IndirectHeap *recordedSsh) { + if (this->useImmediateFlushTask == 1) { + if (nonKernel) { + EXPECT_EQ(nullptr, recordedImmediateFlushTaskFlags.sshCpuBase); + } else { + EXPECT_NE(nullptr, recordedImmediateFlushTaskFlags.sshCpuBase); + } + } else { + if (nonKernel) { + EXPECT_EQ(nullptr, recordedSsh); + } else { + EXPECT_NE(nullptr, recordedSsh); + } + } +} + bool AppendFillFixture::MockDriverFillHandle::findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) { diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 22a13afbfe..bbab42c74a 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -169,6 +169,35 @@ struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixtu std::unique_ptr event; }; +struct ImmediateCmdListSharedHeapsFlushTaskFixtureInit : public ImmediateCmdListSharedHeapsFixture { + void setUp(int32_t useImmediateFlushTask); + + enum NonKernelOperation { + Barrier = 0, + SignalEvent, + ResetEvent, + WaitOnEvents, + WriteGlobalTimestamp, + MemoryRangesBarrier + }; + + template + void testBody(NonKernelOperation operation); + + void appendNonKernelOperation(L0::ult::CommandList *currentCmdList, NonKernelOperation operation); + + void validateDispatchFlags(bool nonKernel, NEO::ImmediateDispatchFlags &recordedImmediateFlushTaskFlags, const NEO::IndirectHeap *recordedSsh); + + int32_t useImmediateFlushTask; +}; + +template +struct ImmediateCmdListSharedHeapsFlushTaskFixture : public ImmediateCmdListSharedHeapsFlushTaskFixtureInit { + void setUp() { + ImmediateCmdListSharedHeapsFlushTaskFixtureInit::setUp(useImmediateFlushTaskT); + } +}; + class AppendFillFixture : public DeviceFixture { public: class MockDriverFillHandle : public L0::DriverHandleImp { diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index aa4df6638c..df4e64cca8 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -7,6 +7,7 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/thread_arbitration_policy.h" +#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" @@ -1212,5 +1213,61 @@ void TbxImmediateCommandListFixture::setUpT() { setEvent(); } +template +void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::testBody(NonKernelOperation operation) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + const ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = ZE_RESULT_SUCCESS; + + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); + + result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); + + auto &cmdContainer = commandListImmediate->commandContainer; + auto &cmdContainerCoexisting = commandListImmediateCoexisting->commandContainer; + + auto sshFirstCmdList = cmdContainer.getSurfaceStateHeapReserve().indirectHeapReservation; + auto sshCoexistingCmdList = cmdContainerCoexisting.getSurfaceStateHeapReserve().indirectHeapReservation; + + void *firstSshCpuPointer = sshFirstCmdList->getCpuBase(); + + EXPECT_EQ(sshFirstCmdList->getCpuBase(), sshCoexistingCmdList->getCpuBase()); + + auto csrSshHeap = &ultCsr.getIndirectHeap(HeapType::SURFACE_STATE, 0); + + EXPECT_EQ(csrSshHeap->getCpuBase(), sshFirstCmdList->getCpuBase()); + + csrSshHeap->getSpace(csrSshHeap->getAvailableSpace()); + + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); + + EXPECT_NE(firstSshCpuPointer, sshFirstCmdList->getCpuBase()); + + result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + validateDispatchFlags(false, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); + + EXPECT_EQ(sshFirstCmdList->getCpuBase(), sshCoexistingCmdList->getCpuBase()); + EXPECT_EQ(csrSshHeap->getCpuBase(), sshFirstCmdList->getCpuBase()); + + appendNonKernelOperation(commandListImmediate.get(), operation); + validateDispatchFlags(true, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); + + appendNonKernelOperation(commandListImmediateCoexisting.get(), operation); + validateDispatchFlags(true, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 311b0e30bf..6741ebea23 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -19,7 +19,7 @@ #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/image/image_hw.h" -#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -313,9 +313,9 @@ HWTEST2_F(CommandListAppendLaunchKernelResetKernelCount, givenIsKernelSplitOpera eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); CmdListKernelLaunchParams launchParams = {}; { event->zeroKernelCount(); @@ -395,9 +395,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -427,9 +427,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -459,9 +459,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -491,9 +491,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -523,9 +523,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -555,9 +555,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -587,9 +587,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -620,9 +620,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -652,9 +652,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -695,9 +695,9 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -756,9 +756,9 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); auto eventHandle = event->toHandle(); result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true, false); @@ -798,9 +798,9 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); auto eventHandle = event->toHandle(); result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false, false); @@ -1300,5 +1300,81 @@ HWTEST2_F(CommandListCreate, givenStateBaseAddressTrackingStateWhenCommandListCr } } +using ImmediateCmdListSharedHeapsRegularFlushTaskTest = Test>; + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendBarrierProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::Barrier); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendSignalEventProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::SignalEvent); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendResetEventProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::ResetEvent); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendWaitOnEventsProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::WaitOnEvents); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendWriteGlobalTimestampProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::WriteGlobalTimestamp); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsRegularFlushTaskTest, + givenMultipleCommandListsUsingRegularWhenOldSharedHeapIsDepletedThenNonKernelAppendMemoryRangesBarrierProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::MemoryRangesBarrier); +} + +using ImmediateCmdListSharedHeapsImmediateFlushTaskTest = Test>; + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendBarrierProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::Barrier); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendSignalEventProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::SignalEvent); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendResetEventProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::ResetEvent); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendWaitOnEventsProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::WaitOnEvents); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendWriteGlobalTimestampProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::WriteGlobalTimestamp); +} + +HWTEST2_F(ImmediateCmdListSharedHeapsImmediateFlushTaskTest, + givenMultipleCommandListsUsingImmediateWhenOldSharedHeapIsDepletedThenNonKernelAppendMemoryRangesBarrierProvidesNoHeapInfo, + IsAtLeastSkl) { + testBody(NonKernelOperation::MemoryRangesBarrier); +} + } // namespace ult } // namespace L0 diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index e0fbb760f6..e2934bfb39 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -196,6 +196,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { recordedDispatchFlags = dispatchFlags; + recordedSsh = ssh; this->lastFlushedCommandStream = &commandStream; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } @@ -451,6 +452,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ TaskCountType flushBcsTaskReturnValue{}; LinearStream *lastFlushedCommandStream = nullptr; + const IndirectHeap *recordedSsh = nullptr; std::atomic recursiveLockCounter; std::atomic waitForCompletionWithTimeoutTaskCountCalled{0};