feature: allow for dispatching work without event pool allocation

Related-To: NEO-11925

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-23 13:25:40 +00:00
committed by Compute-Runtime-Automation
parent 5a72d93c31
commit 65cc393638
11 changed files with 261 additions and 128 deletions

View File

@@ -1631,7 +1631,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
bool emitPipeControl = !isCopyOnlyEnabled && launchParams.pipeControlSignalling;
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl) {
if (!signalEvent && !isCopyOnlyEnabled) {
if ((!signalEvent || !signalEvent->getAllocation(this->device)) && !isCopyOnlyEnabled) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
@@ -2175,7 +2175,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
bool nonWalkerInOrderCmdChaining = false;
if (this->isInOrderExecutionEnabled()) {
if (launchParams.isKernelSplitOperation) {
if (!signalEvent) {
if (!signalEvent || !signalEvent->getAllocation(this->device)) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
@@ -3988,7 +3988,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleCounterBasedEventOperations(Eve
}
}
if (signalEvent->isUsingContextEndOffset() && NEO::debugManager.flags.StandaloneInOrderTimestampAllocationEnabled.get() == 1) {
if (signalEvent->isUsingContextEndOffset() && Event::standaloneInOrderTimestampAllocationEnabled()) {
signalEvent->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag());
}
}

View File

@@ -334,6 +334,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
if (!eventForInOrderExec->getAllocation(this->device) && Event::standaloneInOrderTimestampAllocationEnabled()) {
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag());
}
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false);
} else {
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();

View File

@@ -41,6 +41,10 @@ template Event *Event::create<uint32_t>(EventPool *, const ze_event_desc_t *, De
template Event *Event::create<uint64_t>(const EventDescriptor &, const ze_event_desc_t *, Device *);
template Event *Event::create<uint32_t>(const EventDescriptor &, const ze_event_desc_t *, Device *);
bool Event::standaloneInOrderTimestampAllocationEnabled() {
return (NEO::debugManager.flags.StandaloneInOrderTimestampAllocationEnabled.get() == 1);
}
ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) {
this->context = static_cast<ContextImp *>(context);
@@ -410,6 +414,14 @@ uint64_t Event::getGpuAddress(Device *device) const {
return getAllocation(device)->getGpuAddress() + this->eventPoolOffset;
}
void *Event::getHostAddress() const {
if (inOrderTimestampNode) {
return inOrderTimestampNode->getCpuBase();
}
return this->hostAddressFromPool;
}
NEO::GraphicsAllocation *Event::getAllocation(Device *device) const {
auto rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex();

View File

@@ -112,6 +112,8 @@ struct Event : _ze_event_handle_t {
implicitlyDisabled
};
static bool standaloneInOrderTimestampAllocationEnabled();
template <typename TagSizeT>
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
@@ -133,7 +135,7 @@ struct Event : _ze_event_handle_t {
virtual uint64_t getPacketAddress(Device *device) = 0;
MOCKABLE_VIRTUAL void resetPackets(bool resetAllPackets);
virtual void resetKernelCountAndPacketUsedCount() = 0;
void *getHostAddress() const { return hostAddress; }
void *getHostAddress() const;
virtual void setPacketsInUse(uint32_t value) = 0;
uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; }
MOCKABLE_VIRTUAL void setGpuStartTimestamp();
@@ -333,7 +335,7 @@ struct Event : _ze_event_handle_t {
MetricCollectorEventNotify *metricNotification = nullptr;
NEO::MultiGraphicsAllocation *eventPoolAllocation = nullptr;
StackVec<NEO::CommandStreamReceiver *, 1> csrs;
void *hostAddress = nullptr;
void *hostAddressFromPool = nullptr;
Device *device = nullptr;
std::weak_ptr<Kernel> kernelWithPrintf = std::weak_ptr<Kernel>{};
std::mutex *kernelWithPrintfDeviceMutex = nullptr;

View File

@@ -51,7 +51,7 @@ Event *Event::create(const EventDescriptor &eventDescriptor, const ze_event_desc
event->totalEventSize = eventDescriptor.totalEventSize;
event->eventPoolOffset = desc->index * event->totalEventSize;
event->hostAddress = ptrOffset(baseHostAddress, event->eventPoolOffset);
event->hostAddressFromPool = ptrOffset(baseHostAddress, event->eventPoolOffset);
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csrs.push_back(csr);
@@ -71,12 +71,6 @@ Event *Event::create(const EventDescriptor &eventDescriptor, const ze_event_desc
}
event->setUsingContextEndOffset(useContextEndOffset);
// do not reset even if it has been imported, since event pool
// might have been imported after events being already signaled
if (event->isFromIpcPool == false) {
event->resetDeviceCompletionData(true);
}
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
@@ -94,6 +88,12 @@ Event *Event::create(const EventDescriptor &eventDescriptor, const ze_event_desc
event->enableCounterBasedMode(true, eventDescriptor.counterBasedFlags);
}
// do not reset even if it has been imported, since event pool
// might have been imported after events being already signaled
if (event->isFromIpcPool == false) {
event->resetDeviceCompletionData(true);
}
auto extendedDesc = reinterpret_cast<const ze_base_desc_t *>(desc->pNext);
bool interruptMode = false;
@@ -140,6 +140,10 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
eventPool->isIpcPoolFlagSet(), // ipcPool
};
if (eventPool->getCounterBasedFlags() != 0 && standaloneInOrderTimestampAllocationEnabled()) {
eventDescriptor.eventPoolAllocation = nullptr;
}
Event *event = Event::create<TagSizeT>(eventDescriptor, desc, device);
UNRECOVERABLE_IF(event == nullptr);
event->setEventPool(eventPool);
@@ -313,7 +317,7 @@ void EventImp<TagSizeT>::handleSuccessfulHostSynchronization() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusEventPackets() {
assignKernelEventCompletionData(this->hostAddress);
assignKernelEventCompletionData(getHostAddress());
uint32_t queryVal = Event::STATE_CLEARED;
uint32_t packets = 0;
for (uint32_t i = 0; i < this->kernelCount; i++) {
@@ -334,7 +338,7 @@ ze_result_t EventImp<TagSizeT>::queryStatusEventPackets() {
if (this->signalAllEventPackets) {
if (packets < getMaxPacketsCount()) {
uint32_t remainingPackets = getMaxPacketsCount() - packets;
auto remainingPacketSyncAddress = ptrOffset(this->hostAddress, packets * this->singlePacketSize);
auto remainingPacketSyncAddress = ptrOffset(getHostAddress(), packets * this->singlePacketSize);
remainingPacketSyncAddress = ptrOffset(remainingPacketSyncAddress, this->getCompletionFieldOffset());
for (uint32_t i = 0; i < remainingPackets; i++) {
void const *queryAddress = remainingPacketSyncAddress;
@@ -403,25 +407,24 @@ void EventImp<TagSizeT>::tbxDownload(NEO::Device &device, bool &downloadedAlloca
template <typename TagSizeT>
bool EventImp<TagSizeT>::handlePreQueryStatusOperationsAndCheckCompletion() {
if (this->eventPoolAllocation) {
if (metricNotification != nullptr) {
hostEventSetValue(metricNotification->getNotificationState());
if (metricNotification != nullptr && eventPoolAllocation) {
hostEventSetValue(metricNotification->getNotificationState());
}
if (this->tbxMode) {
bool downloadedAllocation = (eventPoolAllocation == nullptr);
bool downloadedInOrdedAllocation = (inOrderExecInfo.get() == nullptr);
if (inOrderExecInfo && inOrderExecInfo->isExternalMemoryExecInfo()) {
downloadedInOrdedAllocation = true;
DEBUG_BREAK_IF(true); // external allocation - not able to download
}
if (this->tbxMode) {
bool downloadedAllocation = (eventPoolAllocation == nullptr);
bool downloadedInOrdedAllocation = (inOrderExecInfo.get() == nullptr);
tbxDownload(*this->device->getNEODevice(), downloadedAllocation, downloadedInOrdedAllocation);
tbxDownload(*this->device->getNEODevice(), downloadedAllocation, downloadedInOrdedAllocation);
if (!downloadedAllocation || !downloadedInOrdedAllocation) {
for (auto &subDevice : this->device->getNEODevice()->getRootDevice()->getSubDevices()) {
tbxDownload(*subDevice, downloadedAllocation, downloadedInOrdedAllocation);
}
if (!downloadedAllocation || !downloadedInOrdedAllocation) {
for (auto &subDevice : this->device->getNEODevice()->getRootDevice()->getSubDevices()) {
tbxDownload(*subDevice, downloadedAllocation, downloadedInOrdedAllocation);
}
}
} else {
DEBUG_BREAK_IF(this->tbxMode); // external allocation - not able to download
}
if (!this->isFromIpcPool && isAlreadyCompleted()) {
@@ -446,8 +449,11 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
if (isCounterBased() && !getAllocation(this->device)) {
return ZE_RESULT_SUCCESS;
}
auto baseHostAddr = this->hostAddress;
auto baseHostAddr = getHostAddress();
auto baseGpuAddr = getGpuAddress(device);
uint64_t timestampStart = static_cast<uint64_t>(eventVal);
@@ -465,7 +471,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
for (uint32_t i = 0; i < this->kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++, packets++) {
if (castToUint64(baseHostAddr) >= castToUint64(ptrOffset(this->hostAddress, totalEventSize))) {
if (castToUint64(baseHostAddr) >= castToUint64(ptrOffset(getHostAddress(), totalEventSize))) {
break;
}
copyDataToEventAlloc(ptrOffset(baseHostAddr, contextStartOffset), baseGpuAddr + contextStartOffset, sizeof(TagSizeT), timestampStart);
@@ -530,7 +536,7 @@ void EventImp<TagSizeT>::copyDataToEventAlloc(void *dstHostAddr, uint64_t dstGpu
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
if (!hostAddress) {
if (!hostAddressFromPool && !this->inOrderTimestampNode) {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
@@ -538,6 +544,10 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
return hostEventSetValueTimestamps(eventVal);
}
if (isCounterBased()) {
return ZE_RESULT_SUCCESS;
}
auto basePacketHostAddr = getCompletionFieldHostAddress();
auto basePacketGpuAddr = getCompletionFieldGpuAddress(device);
@@ -553,7 +563,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++, packets++) {
if (castToUint64(packetHostAddr) >= castToUint64(ptrOffset(this->hostAddress, totalEventSize))) {
if (castToUint64(packetHostAddr) >= castToUint64(ptrOffset(getHostAddress(), totalEventSize))) {
break;
}
@@ -736,7 +746,7 @@ void EventImp<TagSizeT>::synchronizeCounterBasedTimestampCompletionWithTimeout()
uint64_t timeDiff = 0;
do {
assignKernelEventCompletionData(hostAddress);
assignKernelEventCompletionData(getHostAddress());
calculateProfilingData();
timeDiff = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - startTime).count();
@@ -751,7 +761,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
return ZE_RESULT_NOT_READY;
}
assignKernelEventCompletionData(hostAddress);
assignKernelEventCompletionData(getHostAddress());
calculateProfilingData();
if (isCounterBased() && contextEndTS == Event::STATE_CLEARED) {

View File

@@ -433,7 +433,7 @@ void TbxImmediateCommandListFixture::setEvent() {
auto mockEvent = static_cast<Event *>(event.get());
size_t offset = event->getCompletionFieldOffset();
void *completionAddress = ptrOffset(mockEvent->hostAddress, offset);
void *completionAddress = ptrOffset(mockEvent->hostAddressFromPool, offset);
size_t packets = event->getPacketsInUse();
EventFieldType signaledValue = Event::STATE_SIGNALED;
for (size_t i = 0; i < packets; i++) {

View File

@@ -37,16 +37,24 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
using EventImp<uint32_t>::latestUsedCmdQueue;
using EventImp<uint32_t>::inOrderTimestampNode;
void makeCounterBasedInitiallyDisabled() {
void makeCounterBasedInitiallyDisabled(MultiGraphicsAllocation &poolAllocation) {
resetInOrderTimestampNode(nullptr);
counterBasedMode = CounterBasedMode::initiallyDisabled;
resetCompletionStatus();
counterBasedFlags = 0;
this->eventPoolAllocation = &poolAllocation;
this->hostAddressFromPool = ptrOffset(eventPoolAllocation->getGraphicsAllocation(0)->getUnderlyingBuffer(), eventPoolOffset);
reset();
}
void makeCounterBasedImplicitlyDisabled() {
void makeCounterBasedImplicitlyDisabled(MultiGraphicsAllocation &poolAllocation) {
resetInOrderTimestampNode(nullptr);
counterBasedMode = CounterBasedMode::implicitlyDisabled;
resetCompletionStatus();
counterBasedFlags = 0;
this->eventPoolAllocation = &poolAllocation;
this->hostAddressFromPool = ptrOffset(eventPoolAllocation->getGraphicsAllocation(0)->getUnderlyingBuffer(), eventPoolOffset);
reset();
}
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,7 @@ GEN12LPTEST_F(TimestampEvent, givenEventTimestampsWhenQueryKernelTimestampThenCo
data.globalStart = 3u;
data.globalEnd = 4u;
event->hostAddress = &data;
event->hostAddressFromPool = &data;
ze_kernel_timestamp_result_t result = {};
event->queryKernelTimestamp(&result);
@@ -52,7 +52,7 @@ GEN12LPTEST_F(TimestampUsedPacketSignalEvent, givenEventMoreThanOneTimestampsPac
data[2].globalStart = 6u;
data[2].globalEnd = 7u;
event->hostAddress = &data;
event->hostAddressFromPool = &data;
event->setPacketsInUse(3u);
ze_kernel_timestamp_result_t result = {};

View File

@@ -24,7 +24,7 @@ struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass::Event;
using BaseClass::eventPoolAllocation;
using BaseClass::gpuHangCheckPeriod;
using BaseClass::hostAddress;
using BaseClass::hostAddressFromPool;
using BaseClass::isFromIpcPool;
using BaseClass::l3FlushAppliedOnKernel;
using BaseClass::maxKernelCount;
@@ -41,7 +41,7 @@ struct WhiteBox<::L0::EventImp<TagSizeT>> : public L0::EventImp<TagSizeT> {
using BaseClass = ::L0::EventImp<TagSizeT>;
using BaseClass::csrs;
using BaseClass::gpuHangCheckPeriod;
using BaseClass::hostAddress;
using BaseClass::hostAddressFromPool;
using BaseClass::hostEventSetValueTimestamps;
using BaseClass::isFromIpcPool;
using BaseClass::l3FlushAppliedOnKernel;

View File

@@ -396,7 +396,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallW
EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount);
// non in-order event
events[1]->makeCounterBasedInitiallyDisabled();
events[1]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
events[1]->hostSynchronize(2);
EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount);
}
@@ -406,21 +406,27 @@ HWTEST2_F(InOrderCmdListTests, givenRegularCmdListWhenAppendQueryKernelTimestamp
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
auto eventPool = createEvents<FamilyType>(2, true);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto deviceMem = allocDeviceMem(128);
ze_event_handle_t queryEvents[2] = {events[0]->toHandle(), events[1]->toHandle()};
regularCmdList->appendQueryKernelTimestamps(2, queryEvents, deviceMem, nullptr, nullptr, 0, nullptr);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
bool chainingRequired = regularCmdList->latestOperationRequiredNonWalkerInOrderCmdsChaining;
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto offset = cmdStream->getUsed();
regularCmdList->appendQueryKernelTimestamps(2, queryEvents, deviceMem, nullptr, nullptr, 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset));
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, semaphores.size());
ASSERT_EQ(chainingRequired ? 1u : 2u, semaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphores[0]);
@@ -439,7 +445,7 @@ HWTEST2_F(InOrderCmdListTests, givenCounterBasedTimestampEventWhenQueryingTimest
this->eventPoolAllocation = &pool->getAllocation();
this->totalEventSize = 128;
hostAddress = eventPoolAllocation->getGraphicsAllocation(0)->getUnderlyingBuffer();
hostAddressFromPool = eventPoolAllocation->getGraphicsAllocation(0)->getUnderlyingBuffer();
this->csrs[0] = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
this->maxKernelCount = 1;
@@ -745,7 +751,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithTemporaryInOrderDataAssignme
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
@@ -779,7 +785,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenSetInOrde
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(events[0]->isCounterBased());
@@ -816,7 +822,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnR
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
@@ -923,7 +929,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingStoreDataImmThenP
auto eventPool = createEvents<FamilyType>(2, false);
auto eventHandle = events[0]->toHandle();
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
EXPECT_FALSE(events[1]->isKmdWaitModeEnabled());
EXPECT_FALSE(events[1]->isInterruptModeEnabled());
@@ -1051,7 +1057,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForRegularEventFromPre
auto immCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto eventHandle = events[0]->toHandle();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -1171,9 +1177,9 @@ HWTEST2_F(InOrderCmdListTests, givenImplicitEventConvertionEnabledWhenUsingImmed
outOfOrderImmCmdList->inOrderExecInfo.reset();
auto eventPool = createEvents<FamilyType>(3, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[1]->makeCounterBasedInitiallyDisabled();
events[2]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
events[1]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
events[2]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1260,7 +1266,7 @@ HWTEST2_F(InOrderCmdListTests, givenImplicitEventConvertionEnabledWhenUsingAppen
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
events[0]->enableCounterBasedMode(false, eventPool->getCounterBasedFlags());
immCmdList->appendEventReset(events[0]->toHandle());
@@ -1271,7 +1277,7 @@ HWTEST2_F(InOrderCmdListTests, givenImplicitEventConvertionEnabledWhenUsingAppen
HWTEST2_F(InOrderCmdListTests, givenImplicitEventConvertionEnabledWhenCallingAppendThenHandleInOrderExecInfo, MatchAny) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
events[0]->enableCounterBasedMode(false, eventPool->getCounterBasedFlags());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@@ -1303,7 +1309,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingWhenDispatchingKernelThenProgram
}
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -1416,7 +1422,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
AlignedAllocationData allocationData = {mockAllocation.gpuAddress, 0, &mockAllocation, false};
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false);
if (dcFlushRequired) {
@@ -1428,7 +1434,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
CmdListKernelLaunchParams cooperativeParams = {};
cooperativeParams.isCooperative = true;
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, cooperativeParams, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1436,7 +1442,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernelIndirect(kernel->toHandle(), *static_cast<ze_group_count_t *>(alloc), eventHandle, 0, nullptr, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1446,7 +1452,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&copyData[0]);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, eventHandle, 0, nullptr);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1454,15 +1460,18 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
copyOnlyCmdList->appendMemoryCopyBlitRegion(&allocationData, &allocationData, region, region, {0, 0, 0}, 0, 0, 0, 0, {0, 0, 0}, {0, 0, 0}, events[0].get(), 0, nullptr, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
} else {
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
if (events[0]->inOrderTimestampNode) {
copyOnlyCmdList->inOrderExecInfo->pushTempTimestampNode(events[0]->inOrderTimestampNode, events[0]->inOrderExecSignalValue);
}
events[0]->inOrderTimestampNode = nullptr;
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendMemoryCopy(&copyData, &copyData, 1, eventHandle, 0, nullptr, false, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1470,7 +1479,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendMemoryFill(alloc, &copyData, 1, 16, eventHandle, 0, nullptr, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1478,7 +1487,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
copyOnlyCmdList->appendBlitFill(alloc, &copyData, 1, 16, events[0].get(), 0, nullptr, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1486,7 +1495,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendSignalEvent(eventHandle);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1494,7 +1503,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(copyData), eventHandle, 0, nullptr);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1502,7 +1511,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendBarrier(eventHandle, 0, nullptr, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1512,7 +1521,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL;
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), copyData, 1, eventHandle, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1530,7 +1539,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv
}
immCmdList->copyThroughLockedPtrEnabled = true;
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendMemoryCopy(alloc, &copyData, 1, eventHandle, 0, nullptr, false, false);
if (dcFlushRequired) {
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
@@ -1637,7 +1646,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer
bool heaplessEnabled = immCmdList->isHeaplessModeEnabled();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -1687,7 +1696,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyAndFlushRequiredWh
auto immCmdList = createImmCmdList<gfxCoreFamily>();
bool heaplessEnabled = immCmdList->isHeaplessModeEnabled();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventHandle = events[0]->toHandle();
@@ -1838,7 +1847,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingWhenDispatchingKernelWithRelaxed
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -2074,7 +2083,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, false);
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope()));
@@ -2117,11 +2125,13 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
ASSERT_NE(cmdList.end(), walkerItor);
WalkerVariant walkerVariant = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(*walkerItor);
std::visit([&cmdList, &immCmdList, &walkerItor, isCompactEvent, eventEndGpuVa, counterOffset](auto &&walker) {
std::visit([&](auto &&walker) {
auto &postSync = walker->getPostSync();
using PostSyncType = std::decay_t<decltype(postSync)>;
if (isCompactEvent) {
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
EXPECT_EQ(PostSyncType::OPERATION::OPERATION_NO_WRITE, postSync.getOperation());
auto pcItor = find<PIPE_CONTROL *>(walkerItor, cmdList.end());
@@ -2419,7 +2429,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenCl
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->signalScope = 0;
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
if (immCmdList->inOrderExecInfo->isAtomicDeviceSignalling()) {
GTEST_SKIP();
@@ -2575,7 +2585,7 @@ HWTEST2_F(InOrderCmdListTests, givenNonPostSyncWalkerWhenAskingForNonWalkerSigna
auto eventPool1 = createEvents<FamilyType>(1, true);
auto eventPool2 = createEvents<FamilyType>(1, false);
auto eventPool3 = createEvents<FamilyType>(1, false);
events[2]->makeCounterBasedInitiallyDisabled();
events[2]->makeCounterBasedInitiallyDisabled(eventPool3->getAllocation());
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get()));
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[1].get()));
@@ -2591,7 +2601,7 @@ HWTEST2_F(InOrderCmdListTests, givenMultipleAllocationsForWriteWhenAskingForNonW
auto eventPool0 = createEvents<FamilyType>(1, true);
auto eventPool1 = createEvents<FamilyType>(1, false);
auto eventPool2 = createEvents<FamilyType>(1, false);
events[2]->makeCounterBasedInitiallyDisabled();
events[2]->makeCounterBasedInitiallyDisabled(eventPool2->getAllocation());
bool isCompactEvent0 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope()));
bool isCompactEvent1 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[1]->isSignalScope()));
@@ -2765,7 +2775,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingNonKernelAppendThe
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t inOrderSyncVa = inOrderExecInfo->getBaseDeviceAddress();
@@ -2867,7 +2877,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
uint8_t ptr[64] = {};
@@ -3314,13 +3324,50 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOu
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
ASSERT_NE(nullptr, pcCmd);
while (PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE == pcCmd->getPostSyncOperation()) {
pcItor = find<PIPE_CONTROL *>(++pcItor, cmdList.end());
ASSERT_NE(cmdList.end(), pcItor);
auto sdiItor = find<MI_STORE_DATA_IMM *>(pcItor, cmdList.end());
ASSERT_NE(cmdList.end(), sdiItor);
pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
ASSERT_NE(nullptr, pcCmd);
}
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t syncVa = inOrderExecInfo->isHostStorageDuplicated() ? reinterpret_cast<uint64_t>(inOrderExecInfo->getBaseHostAddress()) : inOrderExecInfo->getBaseDeviceAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
context->freeMem(data);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOutProfilingEventThenSignalInOrderAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
auto walkerItor = NEO::UnitTestHelper<FamilyType>::findWalkerTypeCmd(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), walkerItor);
auto pcItor = find<PIPE_CONTROL *>(walkerItor, cmdList.end());
ASSERT_NE(cmdList.end(), pcItor);
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
ASSERT_NE(nullptr, pcCmd);
auto sdiItor = find<MI_STORE_DATA_IMM *>(pcItor, cmdList.end());
ASSERT_NE(cmdList.end(), sdiItor);
@@ -3516,7 +3563,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto eventHandle = events[0]->toHandle();
@@ -3929,7 +3976,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
auto offset = cmdStream->getUsed();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto eventHandle = events[0]->toHandle();
@@ -4295,7 +4342,6 @@ HWTEST2_F(InOrderCmdListTests, givenProfilingEventWhenDoingCpuCopyThenSetProfili
EXPECT_NE(nullptr, events[0]->inOrderExecInfo.get());
EXPECT_TRUE(events[0]->isAlreadyCompleted());
EXPECT_EQ(L0::Event::STATE_CLEARED, *static_cast<uint32_t *>(events[0]->getHostAddress()));
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, eventHandle1, 0, nullptr, false, false);
@@ -4317,7 +4363,7 @@ HWTEST2_F(InOrderCmdListTests, givenEventCreatedFromPoolWhenItIsQueriedForAddres
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zexEventGetDeviceAddress(eventHandle, nullptr, &address));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zexEventGetDeviceAddress(nullptr, &counterValue, &address));
events[0]->makeCounterBasedImplicitlyDisabled();
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
EXPECT_EQ(ZE_RESULT_SUCCESS, zexEventGetDeviceAddress(eventHandle, &counterValue, &address));
EXPECT_EQ(Event::State::STATE_SIGNALED, counterValue);
EXPECT_EQ(address, events[0]->getCompletionFieldGpuAddress(events[0]->peekEventPool()->getDevice()));
@@ -4709,14 +4755,50 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
ASSERT_NE(nullptr, pcCmd);
while (PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE == pcCmd->getPostSyncOperation()) {
cmdItor = find<PIPE_CONTROL *>(++cmdItor, cmdList.end());
ASSERT_NE(cmdList.end(), cmdItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
ASSERT_NE(nullptr, pcCmd);
while (sdiCmd == nullptr && cmdItor != cmdList.end()) {
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
}
ASSERT_NE(nullptr, sdiCmd);
auto inOrderExecInfo = immCmdList->inOrderExecInfo;
uint64_t syncVa = inOrderExecInfo->isHostStorageDuplicated() ? reinterpret_cast<uint64_t>(inOrderExecInfo->getBaseHostAddress()) : inOrderExecInfo->getBaseDeviceAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
alignedFree(alignedPtr);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithProfilingEventThenSignalCounter, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
auto eventHandle = events[0]->toHandle();
const size_t ptrBaseSize = 128;
const size_t offset = 1;
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
auto unalignedPtr = ptrOffset(alignedPtr, offset);
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, eventHandle, 0, nullptr, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
auto cmdItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cmdItor);
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*cmdItor);
ASSERT_NE(nullptr, pcCmd);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++cmdItor));
while (sdiCmd == nullptr && cmdItor != cmdList.end()) {
@@ -5690,7 +5772,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto eventHandle = events[0]->toHandle();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
@@ -5752,7 +5834,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithR
uint32_t copyData[64] = {};
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, eventHandle, 0, nullptr, false, false);
if (immCmdList->getDcFlushRequired(true)) {
@@ -6264,7 +6346,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto eventPool = createEvents<FamilyType>(1, true);
auto eventHandle = events[0]->toHandle();
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
auto regularCopyOnlyCmdList = createRegularCmdList<gfxCoreFamily>(true);
@@ -6554,13 +6636,29 @@ HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenTimestampEventWhenAski
EXPECT_NE(events[0]->inOrderTimestampNode->getBaseGraphicsAllocation(), events[0]->eventPoolAllocation);
EXPECT_NE(nullptr, events[0]->inOrderTimestampNode->getBaseGraphicsAllocation());
EXPECT_NE(nullptr, events[0]->eventPoolAllocation);
EXPECT_EQ(nullptr, events[0]->eventPoolAllocation);
EXPECT_EQ(events[0]->inOrderTimestampNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), events[0]->getAllocation(device));
EXPECT_EQ(events[0]->inOrderTimestampNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0)->getGpuAddress(), events[0]->getGpuAddress(device));
EXPECT_EQ(events[0]->getGpuAddress(device) + events[0]->getCompletionFieldOffset(), events[0]->getCompletionFieldGpuAddress(device));
}
HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenNonWalkerCounterSignalingWhenPassedNonProfilingEventThenAssignAllocation, IsAtLeastXeHpCore) {
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
auto cmdList = createImmCmdList<gfxCoreFamily>();
EXPECT_EQ(nullptr, events[0]->inOrderTimestampNode);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false);
bool isCompactEvent = cmdList->compactL3FlushEvent(cmdList->getDcFlushRequired(events[0]->isSignalScope()));
EXPECT_EQ(isCompactEvent, events[0]->getAllocation(device) != nullptr);
EXPECT_EQ(isCompactEvent, cmdList->isInOrderNonWalkerSignalingRequired(events[0].get()));
}
HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenTimestampEventWhenDispatchingThenAssignNewNode, MatchAny) {
auto eventPool = createEvents<FamilyType>(1, true);
auto eventHandle = events[0]->toHandle();
@@ -7001,7 +7099,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenLimitedSyncDispatchWhenAppend
immCmdList->synchronizedDispatchMode = NEO::SynchronizedDispatchMode::limited;
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedInitiallyDisabled();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
size_t offset = cmdStream->getUsed();

View File

@@ -2121,14 +2121,14 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim
packetData[1].globalStart = 7u;
packetData[1].globalEnd = 8u;
eventRoot->hostAddress = packetData;
eventRoot->hostAddressFromPool = packetData;
ze_kernel_timestamp_result_t results[2];
uint32_t numPackets = 2;
for (uint32_t packetId = 0; packetId < numPackets; packetId++) {
eventRoot->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventRoot->hostAddress);
eventRoot->hostAddress = ptrOffset(eventRoot->hostAddress, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
eventRoot->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventRoot->hostAddressFromPool);
eventRoot->hostAddressFromPool = ptrOffset(eventRoot->hostAddressFromPool, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
}
uint32_t pCount = 0;
@@ -2150,11 +2150,11 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim
numPackets = 1;
eventSub0->setPacketsInUse(1u);
eventSub0->hostAddress = packetData;
eventSub0->hostAddressFromPool = packetData;
for (uint32_t packetId = 0; packetId < numPackets; packetId++) {
eventSub0->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventSub0->hostAddress);
eventSub0->hostAddress = ptrOffset(eventSub0->hostAddress, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
eventSub0->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventSub0->hostAddressFromPool);
eventSub0->hostAddressFromPool = ptrOffset(eventSub0->hostAddressFromPool, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
}
pCount = 0;
@@ -2177,11 +2177,11 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim
numPackets = 1;
eventSub1->setPacketsInUse(1u);
eventSub1->hostAddress = packetData;
eventSub1->hostAddressFromPool = packetData;
for (uint32_t packetId = 0; packetId < numPackets; packetId++) {
eventSub1->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventSub1->hostAddress);
eventSub1->hostAddress = ptrOffset(eventSub1->hostAddress, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
eventSub1->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, eventSub1->hostAddressFromPool);
eventSub1->hostAddressFromPool = ptrOffset(eventSub1->hostAddressFromPool, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
}
pCount = 0;
@@ -2288,7 +2288,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
packetData[2].globalStart = timeToTimeStamp(5000u);
packetData[2].globalEnd = timeToTimeStamp(500u);
event->hostAddress = packetData;
event->hostAddressFromPool = packetData;
uint32_t count = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, nullptr));
@@ -2452,7 +2452,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, TimestampEventCreate, givenEventTimestampsWhenQue
data.globalStart = 3u;
data.globalEnd = 4u;
event->hostAddress = &data;
event->hostAddressFromPool = &data;
ze_kernel_timestamp_result_t result = {};
event->queryKernelTimestamp(&result);
@@ -2476,14 +2476,14 @@ TEST_F(TimestampEventUsedPacketSignalCreate, givenEventWhenQueryingTimestampExpT
packetData[1].globalStart = 7u;
packetData[1].globalEnd = 8u;
event->hostAddress = packetData;
event->hostAddressFromPool = packetData;
ze_kernel_timestamp_result_t results[2];
uint32_t pCount = 2;
for (uint32_t packetId = 0; packetId < pCount; packetId++) {
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddressFromPool);
event->hostAddressFromPool = ptrOffset(event->hostAddressFromPool, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
}
auto result = event->queryTimestampsExp(device, &pCount, results);
@@ -2500,7 +2500,7 @@ TEST_F(TimestampEventUsedPacketSignalCreate, givenEventWhenQueryingTimestampExpT
HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnFirstKernelThenDoNotUseSecondPacketOfFirstKernel, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
event->hostAddressFromPool = packetData;
constexpr uint32_t kernelStartValue = 5u;
constexpr uint32_t kernelEndValue = 10u;
@@ -2544,7 +2544,7 @@ HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWh
HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnSecondKernelThenDoNotUseSecondPacketOfSecondKernel, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
event->hostAddressFromPool = packetData;
constexpr uint32_t kernelStartValue = 5u;
constexpr uint32_t kernelEndValue = 10u;
@@ -2587,7 +2587,7 @@ HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWh
HWTEST2_F(TimestampEventCreateMultiKernel, givenOverflowingTimeStampDataOnTwoKernelsWhenQueryKernelTimestampIsCalledOverflowIsObserved, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4] = {};
event->hostAddress = packetData;
event->hostAddressFromPool = packetData;
uint32_t maxTimeStampValue = std::numeric_limits<uint32_t>::max();
@@ -2960,7 +2960,7 @@ TEST_F(EventTests, givenTwoEventsCreatedThenTheyHaveDifferentAddresses) {
auto event1 = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc1, device));
ASSERT_NE(event1, nullptr);
EXPECT_NE(event0->hostAddress, event1->hostAddress);
EXPECT_NE(event0->hostAddressFromPool, event1->hostAddressFromPool);
EXPECT_NE(event0->getGpuAddress(device), event1->getGpuAddress(device));
event0->destroy();
@@ -3590,7 +3590,7 @@ HWTEST_F(EventTests, GivenEventIsReadyToDownloadAllAlocationsWhenDownloadAllocat
auto event = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device));
size_t offset = event->getCompletionFieldOffset();
void *completionAddress = ptrOffset(event->hostAddress, offset);
void *completionAddress = ptrOffset(event->hostAddressFromPool, offset);
size_t packets = event->getPacketsInUse();
uint64_t signaledValue = Event::STATE_SIGNALED;
for (size_t i = 0; i < packets; i++) {
@@ -3620,7 +3620,7 @@ HWTEST_F(EventTests, GivenNotReadyEventBecomesReadyWhenDownloadAllocationRequire
EXPECT_EQ(0u, ultCsr.downloadAllocationsCalledCount);
size_t offset = event->getCompletionFieldOffset();
void *completionAddress = ptrOffset(event->hostAddress, offset);
void *completionAddress = ptrOffset(event->hostAddressFromPool, offset);
size_t packets = event->getPacketsInUse();
uint64_t signaledValue = Event::STATE_SIGNALED;
for (size_t i = 0; i < packets; i++) {
@@ -3710,7 +3710,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
EXPECT_FALSE(eventAllocation->isTbxWritable(expectedBanks));
size_t offset = event->getCompletionFieldOffset();
void *completionAddress = ptrOffset(event->hostAddress, offset);
void *completionAddress = ptrOffset(event->hostAddressFromPool, offset);
size_t packets = event->getPacketsInUse();
uint64_t signaledValue = Event::STATE_SIGNALED;
for (size_t i = 0; i < packets; i++) {
@@ -3730,7 +3730,7 @@ struct MockEventCompletion : public L0::EventImp<TagSizeT> {
using BaseClass = L0::EventImp<TagSizeT>;
using BaseClass::gpuEndTimestamp;
using BaseClass::gpuStartTimestamp;
using BaseClass::hostAddress;
using BaseClass::hostAddressFromPool;
MockEventCompletion(MultiGraphicsAllocation *alloc, uint32_t eventSize, uint32_t maxKernelCount, uint32_t maxPacketsCount, int index, L0::Device *device) : BaseClass::EventImp(index, device, false) {
auto neoDevice = device->getNEODevice();
@@ -3742,7 +3742,7 @@ struct MockEventCompletion : public L0::EventImp<TagSizeT> {
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex())->getUnderlyingBuffer());
this->totalEventSize = eventSize;
this->eventPoolOffset = index * this->totalEventSize;
hostAddress = reinterpret_cast<void *>(baseHostAddr + this->eventPoolOffset);
hostAddressFromPool = reinterpret_cast<void *>(baseHostAddr + this->eventPoolOffset);
this->csrs[0] = neoDevice->getDefaultEngine().commandStreamReceiver;
this->maxKernelCount = maxKernelCount;
@@ -3796,7 +3796,7 @@ TEST_F(EventTests, givenDebugFlagSetWhenCallingResetThenSynchronizeBeforeReset)
auto event = std::make_unique<MockEventCompletion<uint32_t>>(&eventPool->getAllocation(), eventPool->getEventSize(), eventPool->getMaxKernelCount(), eventPool->getEventMaxPackets(), 1u, device);
event->failOnNextQueryStatus = true;
*reinterpret_cast<uint32_t *>(event->hostAddress) = Event::STATE_SIGNALED;
*reinterpret_cast<uint32_t *>(event->hostAddressFromPool) = Event::STATE_SIGNALED;
testing::internal::CaptureStdout();
@@ -3815,7 +3815,7 @@ TEST_F(EventTests, givenDebugFlagSetWhenCallingResetThenPrintLogAndSynchronizeBe
debugManager.flags.SynchronizeEventBeforeReset.set(2);
auto event = std::make_unique<MockEventCompletion<uint32_t>>(&eventPool->getAllocation(), eventPool->getEventSize(), eventPool->getMaxKernelCount(), eventPool->getEventMaxPackets(), 1u, device);
*reinterpret_cast<uint32_t *>(event->hostAddress) = Event::STATE_SIGNALED;
*reinterpret_cast<uint32_t *>(event->hostAddressFromPool) = Event::STATE_SIGNALED;
{
event->failOnNextQueryStatus = false;