fix: append paths for standalone CB Event

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2024-02-16 11:03:25 +00:00
committed by Compute-Runtime-Automation
parent 0120d8a58d
commit 8dc3364d56
10 changed files with 201 additions and 30 deletions

View File

@@ -548,7 +548,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
event->resetPackets(false);
event->disableHostCaching(!isImmediateType());
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
// default state of event is single packet, handle case when reset is used 1st, launchkernel 2nd - just reset all packets then, use max
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
@@ -2156,14 +2156,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, bool skipBarrierForEndProfiling) {
if (event == nullptr) {
if (event == nullptr || !event->getPoolAllocation(this->device)) {
return;
}
if (event->isEventTimestampFlagSet()) {
appendEventForProfiling(event, false, skipBarrierForEndProfiling);
} else {
event->resetKernelCountAndPacketUsedCount();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
event->setPacketsInUse(this->partitionCount);
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, !isCopyOnly(), false);
@@ -2175,7 +2175,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
if (!event->isEventTimestampFlagSet()) {
return;
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
@@ -2362,7 +2362,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
if (NEO::debugManager.flags.EnableSWTags.get()) {
@@ -2506,7 +2506,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
continue;
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
appendWaitOnSingleEvent(event, relaxedOrderingAllowed);
}
@@ -2633,7 +2633,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
return;
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
bool workloadPartition = isTimestampEventForMultiTile(event);
appendDispatchOffsetRegister(workloadPartition, true);
@@ -2749,7 +2749,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
for (uint32_t i = 0u; i < numEvents; ++i) {
auto event = Event::fromHandle(phEvents[i]);
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
timestampsData[i].address = event->getGpuAddress(this->device);
timestampsData[i].packetsInUse = event->getPacketsInUse();
timestampsData[i].timestampSizeInDw = event->getTimestampSizeInDw();

View File

@@ -198,13 +198,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
compactEvent = event;
event = nullptr;
} else {
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
NEO::GraphicsAllocation *eventPoolAlloc = event->getPoolAllocation(this->device);
if (eventPoolAlloc) {
commandContainer.addToResidencyContainer(eventPoolAlloc);
eventAddress = event->getPacketAddress(this->device);
isTimestampEvent = event->isUsingContextEndOffset();
}
bool flushRequired = event->isSignalScope() &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
interruptEvent = event->isInterruptModeEnabled();
}
}

View File

@@ -406,11 +406,11 @@ void Event::disableImplicitCounterBasedMode() {
}
uint64_t Event::getGpuAddress(Device *device) const {
return getAllocation(device).getGpuAddress() + this->eventPoolOffset;
return getPoolAllocation(device)->getGpuAddress() + this->eventPoolOffset;
}
NEO::GraphicsAllocation &Event::getAllocation(Device *device) const {
return *this->eventPoolAllocation->getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
NEO::GraphicsAllocation *Event::getPoolAllocation(Device *device) const {
return this->eventPoolAllocation ? this->eventPoolAllocation->getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()) : nullptr;
}
void Event::setGpuStartTimestamp() {

View File

@@ -117,7 +117,7 @@ struct Event : _ze_event_handle_t {
inline ze_event_handle_t toHandle() { return this; }
MOCKABLE_VIRTUAL NEO::GraphicsAllocation &getAllocation(Device *device) const;
MOCKABLE_VIRTUAL NEO::GraphicsAllocation *getPoolAllocation(Device *device) const;
void setEventPool(EventPool *eventPool) { this->eventPool = eventPool; }

View File

@@ -319,7 +319,7 @@ bool EventImp<TagSizeT>::handlePreQueryStatusOperationsAndCheckCompletion() {
for (auto const &engine : allEngines) {
const auto &csr = engine.commandStreamReceiver;
if (!downloadedAllocation) {
if (auto &alloc = this->getAllocation(this->device); alloc.isUsedByOsContext(csr->getOsContext().getContextId())) {
if (auto &alloc = *this->getPoolAllocation(this->device); alloc.isUsedByOsContext(csr->getOsContext().getContextId())) {
csr->downloadAllocation(alloc);
downloadedAllocation = true;
}
@@ -367,7 +367,7 @@ template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
auto baseHostAddr = this->hostAddress;
auto baseGpuAddr = getAllocation(device).getGpuAddress();
auto baseGpuAddr = getPoolAllocation(device)->getGpuAddress();
uint64_t timestampStart = static_cast<uint64_t>(eventVal);
uint64_t timestampEnd = static_cast<uint64_t>(eventVal);
@@ -415,15 +415,20 @@ void EventImp<TagSizeT>::copyDataToEventAlloc(void *dstHostAddr, uint64_t dstGpu
memcpy_s(dstHostAddr, copySize, &copyData, copySize);
if (this->tbxMode) {
auto &alloc = getAllocation(device);
auto alloc = getPoolAllocation(device);
if (!alloc) {
DEBUG_BREAK_IF(true);
return;
}
constexpr uint32_t allBanks = std::numeric_limits<uint32_t>::max();
alloc.setTbxWritable(true, allBanks);
alloc->setTbxWritable(true, allBanks);
auto offset = ptrDiff(dstGpuVa, alloc.getGpuAddress());
auto offset = ptrDiff(dstGpuVa, alloc->getGpuAddress());
csrs[0]->writeMemory(alloc, true, offset, copySize);
csrs[0]->writeMemory(*alloc, true, offset, copySize);
alloc.setTbxWritable(true, allBanks);
alloc->setTbxWritable(true, allBanks);
}
}

View File

@@ -129,8 +129,8 @@ class MockEvent : public ::L0::Event {
this->maxKernelCount = EventPacketsCount::maxKernelSplit;
this->maxPacketCount = EventPacketsCount::eventPackets;
}
NEO::GraphicsAllocation &getAllocation(L0::Device *device) const override {
return *mockAllocation.get();
NEO::GraphicsAllocation *getPoolAllocation(L0::Device *device) const override {
return mockAllocation.get();
}
uint64_t getGpuAddress(L0::Device *device) const override {

View File

@@ -938,7 +938,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
{
auto itorEvent = std::find(std::begin(commandList->getCmdContainer().getResidencyContainer()),
std::end(commandList->getCmdContainer().getResidencyContainer()),
&event->getAllocation(device));
event->getPoolAllocation(device));
EXPECT_NE(itorEvent, std::end(commandList->getCmdContainer().getResidencyContainer()));
}
}

View File

@@ -125,7 +125,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppend
{
auto itorEvent = std::find(std::begin(commandList->getCmdContainer().getResidencyContainer()),
std::end(commandList->getCmdContainer().getResidencyContainer()),
&event->getAllocation(device));
event->getPoolAllocation(device));
EXPECT_NE(itorEvent, std::end(commandList->getCmdContainer().getResidencyContainer()));
}
}

View File

@@ -3609,6 +3609,87 @@ HWTEST2_F(InOrderCmdListTests, givenStandaloneEventWhenCallingSynchronizeThenRet
zeEventDestroy(handle);
}
HWTEST2_F(InOrderCmdListTests, givenStandaloneEventWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
ze_event_handle_t eHandle3 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle3));
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false);
immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eHandle3, 0, nullptr, launchParams, false);
context->freeMem(data);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
zeEventDestroy(eHandle3);
}
HWTEST2_F(InOrderCmdListTests, givenStandaloneEventAndKernelSplitWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
const size_t ptrBaseSize = 128;
const size_t offset = 1;
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
auto unalignedPtr = ptrOffset(alignedPtr, offset);
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, eHandle1, 0, nullptr, false, false);
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 1, &eHandle2, false, false);
alignedFree(alignedPtr);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
}
HWTEST2_F(InOrderCmdListTests, givenStandaloneEventAndCopyOnlyCmdListWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
auto immCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false);
immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false);
context->freeMem(data);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
}
HWTEST2_F(InOrderCmdListTests, givenCounterBasedEventWhenAskingForEventAddressAndValueThenReturnCorrectValues, IsAtLeastSkl) {
auto eventPool = createEvents<FamilyType>(1, false);
uint64_t counterValue = -1;
@@ -3813,6 +3894,87 @@ struct MultiTileInOrderCmdListTests : public InOrderCmdListTests {
const uint32_t partitionCount = 2;
};
HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
ze_event_handle_t eHandle3 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle3));
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false);
immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eHandle3, 0, nullptr, launchParams, false);
context->freeMem(data);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
zeEventDestroy(eHandle3);
}
HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventAndKernelSplitWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
const size_t ptrBaseSize = 128;
const size_t offset = 1;
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
auto unalignedPtr = ptrOffset(alignedPtr, offset);
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, eHandle1, 0, nullptr, false, false);
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 1, &eHandle2, false, false);
alignedFree(alignedPtr);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
}
HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventAndCopyOnlyCmdListWhenCallingAppendThenSuccess, IsAtLeastXeHpCore) {
uint64_t counterValue = 2;
uint64_t *hostAddress = &counterValue;
uint64_t *gpuAddress = ptrOffset(&counterValue, 64);
ze_event_desc_t eventDesc = {};
ze_event_handle_t eHandle1 = nullptr;
ze_event_handle_t eHandle2 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle1));
EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate(context, device, gpuAddress, hostAddress, counterValue + 1, &eventDesc, &eHandle2));
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
auto immCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false);
immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false);
context->freeMem(data);
zeEventDestroy(eHandle1);
zeEventDestroy(eHandle2);
}
HWTEST2_F(MultiTileInOrderCmdListTests, givenDebugFlagSetWhenAskingForAtomicSignallingThenReturnTrue, IsAtLeastXeHpCore) {
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();

View File

@@ -3257,7 +3257,7 @@ HWTEST_F(EventTests, GivenEventWhenHostSynchronizeCalledThenExpectDownloadEventA
downloadAllocationTrack[&gfxAllocation]++;
};
auto eventAllocation = &event->getAllocation(device);
auto eventAllocation = event->getPoolAllocation(device);
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
auto result = event->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -3314,7 +3314,7 @@ HWTEST_F(EventTests, GivenEventUsedOnNonDefaultCsrWhenHostSynchronizeCalledThenA
downloadAllocationTrack[&gfxAllocation]++;
};
auto eventAllocation = &event->getAllocation(device);
auto eventAllocation = event->getPoolAllocation(device);
constexpr uint64_t timeout = 0;
auto result = event->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -3546,7 +3546,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
EXPECT_EQ(0u, ultCsr.writeMemoryParams.callCount);
auto event = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device));
auto eventAllocation = &event->getAllocation(device);
auto eventAllocation = event->getPoolAllocation(device);
EXPECT_TRUE(eventAllocation->getAubInfo().writeMemoryOnly);