feature: cross CmdList and Event in-order data sharing [1/n]

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-09-27 14:02:30 +00:00
committed by Compute-Runtime-Automation
parent 7d356795f0
commit c5f5f60f8d
21 changed files with 202 additions and 206 deletions

View File

@@ -46,6 +46,7 @@
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/helpers/in_order_cmd_helpers.h"
#include "level_zero/core/source/image/image.h"
#include "level_zero/core/source/kernel/kernel.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
@@ -141,8 +142,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
inOrderDependencyCounter = 0;
inOrderAllocationOffset = 0;
if (inOrderDependencyCounterAllocation) {
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
if (inOrderExecInfo) {
auto &inOrderDependencyCounterAllocation = inOrderExecInfo->inOrderDependencyCounterAllocation;
memset(inOrderDependencyCounterAllocation.getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
}
return ZE_RESULT_SUCCESS;
@@ -151,7 +153,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter() {
if (!isQwordInOrderCounter() && ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max())) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true);
inOrderDependencyCounter = 0;
@@ -160,14 +162,14 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter() {
inOrderAllocationOffset += offset;
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -395,7 +397,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
event, launchParams);
addToMappedEventList(event);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
return ret;
@@ -497,7 +499,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0));
}
@@ -517,7 +519,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
}
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -556,7 +558,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
appendSignalEventPostWalker(signalEvent, false);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -1316,7 +1318,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) {
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -1458,7 +1460,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) {
if (!signalEvent && !isCopyOnly()) {
NEO::PipeControlArgs args;
@@ -1497,7 +1499,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
bool forceDisableCopyOnlyInOrderSignaling) {
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -1557,7 +1559,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
addToMappedEventList(signalEvent);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
if (inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
}
@@ -1986,7 +1988,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
if (launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
@@ -2225,7 +2227,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true);
}
}
@@ -2252,7 +2254,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0));
}
@@ -2275,7 +2277,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -2328,12 +2330,12 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderEventWaitRequired(const Event &event) const {
return (event.getInOrderExecDataAllocation() != this->inOrderDependencyCounterAllocation);
return (event.getInOrderExecDataAllocation() != &inOrderExecInfo->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
signalInOrderCompletion &= this->inOrderExecutionEnabled;
signalInOrderCompletion &= this->isInOrderExecutionEnabled();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -2422,7 +2424,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
uint64_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
uint64_t gpuVa = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset;
auto miStoreCmd = reinterpret_cast<MI_STORE_DATA_IMM *>(commandContainer.getCommandStream()->getSpace(sizeof(MI_STORE_DATA_IMM)));
@@ -2568,7 +2570,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendSignalEventPostWalker(signalEvent, false);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -3055,7 +3057,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendEventForProfiling(signalEvent, true, false);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
} else if (isCopyOnly()) {
NEO::MiFlushArgs args{this->dummyBlitWa};
@@ -3075,7 +3077,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
}
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent, this->inOrderExecutionEnabled);
appendSignalEventPostWalker(signalEvent, this->isInOrderExecutionEnabled());
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
@@ -3204,7 +3206,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendEventForProfiling(signalEvent, true, false);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(false);
}
@@ -3223,7 +3225,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent, false);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -3245,7 +3247,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr);
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(false);
}
@@ -3271,7 +3273,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
args);
}
if (this->inOrderExecutionEnabled) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
@@ -3460,8 +3462,8 @@ void CommandListCoreFamily<gfxCoreFamily>::addCmdForPatching(void *cmd, uint64_t
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::patchInOrderCmds() {
if (this->regularCmdListSubmissionCounter > 0) {
auto appendCounter = this->regularCmdListSubmissionCounter * inOrderDependencyCounter;
if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 0) {
auto appendCounter = inOrderExecInfo->regularCmdListSubmissionCounter * inOrderDependencyCounter;
for (auto &cmd : inOrderPatchCmds) {
cmd.patch(appendCounter);

View File

@@ -183,7 +183,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
protected:
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::inOrderExecInfo;
void printKernelsPrintfOutput(bool hangDetected);
MOCKABLE_VIRTUAL ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;

View File

@@ -26,6 +26,7 @@
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
#include "level_zero/core/source/device/bcs_split.h"
#include "level_zero/core/source/helpers/error_code_helper_l0.h"
#include "level_zero/core/source/helpers/in_order_cmd_helpers.h"
#include "encode_surface_state_args.h"
@@ -508,7 +509,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
if (isInOrderExecutionEnabled()) {
if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) {
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
}
return ZE_RESULT_SUCCESS;
@@ -923,7 +924,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
this->latestFlushIsHostVisible = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
if (isInOrderExecutionEnabled() && signalEvent->isInOrderExecEvent()) {
signalEvent->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
signalEvent->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
}
} else {
this->latestFlushIsHostVisible = false;
@@ -1266,11 +1267,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
waitStartTime = lastHangCheckTime;
do {
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
this->csr->downloadAllocation(inOrderExecInfo->inOrderDependencyCounterAllocation);
bool signaled = true;
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
auto hostAddress = static_cast<uint64_t *>(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset));
for (uint32_t i = 0; i < this->partitionCount; i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {

View File

@@ -261,7 +261,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
event->setKernelForPrintf(kernel);
}
if (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation) {
if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);

View File

@@ -296,14 +296,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->dcFlushSupport // dcFlushEnable
};
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation);
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation);
bool inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(event);
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false);
} else {
dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
dispatchKernelArgs.eventAddress = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset;
dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1;
}
}

View File

@@ -74,8 +74,6 @@ ze_result_t CommandListImp::destroy() {
}
}
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation);
delete this;
return ZE_RESULT_SUCCESS;
}
@@ -226,19 +224,19 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
}
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecutionEnabled);
UNRECOVERABLE_IF(inOrderExecInfo.get());
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
auto inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
inOrderExecutionEnabled = true;
inOrderExecInfo = std::make_shared<InOrderExecInfo>(*inOrderDependencyCounterAllocation, *device->getMemoryManager(), (this->cmdListType == TYPE_REGULAR));
}
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
@@ -263,4 +261,10 @@ void CommandListImp::addToMappedEventList(Event *event) {
}
}
void CommandListImp::incRegularCmdListSubmissionCounter() {
if (isInOrderExecutionEnabled()) {
inOrderExecInfo->regularCmdListSubmissionCounter++;
}
}
} // namespace L0

View File

@@ -9,7 +9,7 @@
#include "shared/source/os_interface/os_time.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/helpers/in_order_patch_cmds.h"
#include "level_zero/core/source/helpers/in_order_cmd_helpers.h"
#include <memory>
@@ -31,19 +31,17 @@ struct CommandListImp : CommandList {
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
void enableInOrderExecution();
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
bool isInOrderExecutionEnabled() const { return inOrderExecInfo.get(); }
void storeReferenceTsToMappedEvents(bool clear);
void addToMappedEventList(Event *event);
const std::vector<Event *> &peekMappedEventList() { return mappedTsEventList; }
void incRegularCmdListSubmissionCounter() { regularCmdListSubmissionCounter++; }
void incRegularCmdListSubmissionCounter();
virtual void patchInOrderCmds() = 0;
protected:
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
uint64_t regularCmdListSubmissionCounter = 0;
std::shared_ptr<InOrderExecInfo> inOrderExecInfo;
uint64_t inOrderDependencyCounter = 0;
uint32_t inOrderAllocationOffset = 0;
bool inOrderExecutionEnabled = false;
~CommandListImp() override = default;

View File

@@ -335,8 +335,6 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
}
ze_result_t Event::destroy() {
freeInOrderExecAllocation();
delete this;
return ZE_RESULT_SUCCESS;
}
@@ -394,21 +392,15 @@ void Event::setIsCompleted() {
}
void Event::freeInOrderExecAllocation() {
if (inOrderExecDataAllocation) {
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderExecDataAllocation);
inOrderExecDataAllocation = nullptr;
}
inOrderExecInfo.reset();
}
void Event::updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) {
if (this->inOrderExecDataAllocation != &inOrderDependenciesAllocation) {
freeInOrderExecAllocation();
inOrderDependenciesAllocation.incNumOwners();
void Event::updateInOrderExecState(std::shared_ptr<InOrderExecInfo> &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset) {
if (this->inOrderExecInfo.get() != newInOrderExecInfo.get()) {
inOrderExecInfo = newInOrderExecInfo;
}
inOrderExecSignalValue = signalValue;
inOrderExecDataAllocation = &inOrderDependenciesAllocation;
inOrderAllocationOffset = allocationOffset;
}
@@ -432,4 +424,6 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
}
}
NEO::GraphicsAllocation *Event::getInOrderExecDataAllocation() const { return inOrderExecInfo.get() ? &inOrderExecInfo->inOrderDependencyCounterAllocation : nullptr; }
} // namespace L0

View File

@@ -42,6 +42,7 @@ struct DriverHandle;
struct DriverHandleImp;
struct Device;
struct Kernel;
struct InOrderExecInfo;
#pragma pack(1)
struct IpcEventPoolData {
@@ -215,10 +216,10 @@ struct Event : _ze_event_handle_t {
void setMetricStreamer(MetricStreamer *metricStreamer) {
this->metricStreamer = metricStreamer;
}
void updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset);
void updateInOrderExecState(std::shared_ptr<InOrderExecInfo> &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset);
bool isInOrderExecEvent() const { return inOrderExecEvent; }
void enableInOrderMode() { this->inOrderExecEvent = true; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const;
uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
@@ -265,7 +266,7 @@ struct Event : _ze_event_handle_t {
Device *device = nullptr;
EventPool *eventPool = nullptr;
Kernel *kernelWithPrintf = nullptr;
NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr;
std::shared_ptr<InOrderExecInfo> inOrderExecInfo;
CommandQueue *latestUsedCmdQueue = nullptr;
uint32_t maxKernelCount = 0;

View File

@@ -152,11 +152,11 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
if (!this->inOrderExecDataAllocation) {
if (!this->inOrderExecInfo.get()) {
return ZE_RESULT_NOT_READY;
}
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
auto hostAddress = static_cast<uint64_t *>(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset));
bool signaled = true;
for (uint32_t i = 0; i < this->getPacketsInUse(); i++) {
@@ -247,8 +247,8 @@ bool EventImp<TagSizeT>::handlePreQueryStatusOperationsAndCheckCompletion() {
csr->downloadAllocation(alloc);
}
if (inOrderExecEvent) {
if (auto &alloc = *this->inOrderExecDataAllocation; alloc.isUsedByOsContext(csr->getOsContext().getContextId())) {
if (inOrderExecInfo) {
if (auto &alloc = inOrderExecInfo->inOrderDependencyCounterAllocation; alloc.isUsedByOsContext(csr->getOsContext().getContextId())) {
csr->downloadAllocation(alloc);
}
}
@@ -385,7 +385,11 @@ ze_result_t EventImp<TagSizeT>::waitForUserFence(uint64_t timeout) {
return ZE_RESULT_SUCCESS;
}
uint64_t waitAddress = castToUint64(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
if (!inOrderExecInfo) {
return ZE_RESULT_NOT_READY;
}
uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset));
if (!csrs[0]->waitUserFence(this->inOrderExecSignalValue, waitAddress, timeout)) {
return ZE_RESULT_NOT_READY;

View File

@@ -11,7 +11,8 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/error_code_helper_l0.cpp
${CMAKE_CURRENT_SOURCE_DIR}/error_code_helper_l0.h
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_l0.cpp
${CMAKE_CURRENT_SOURCE_DIR}/in_order_patch_cmds.h
${CMAKE_CURRENT_SOURCE_DIR}/in_order_cmd_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/in_order_cmd_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_factory_init.inl
${CMAKE_CURRENT_SOURCE_DIR}/l0_populate_factory.h
${CMAKE_CURRENT_SOURCE_DIR}/properties_parser.h

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/helpers/in_order_cmd_helpers.h"
#include "shared/source/memory_manager/memory_manager.h"
#include <cstdint>
#include <vector>
namespace L0 {
InOrderExecInfo::~InOrderExecInfo() {
memoryManager.freeGraphicsMemory(&inOrderDependencyCounterAllocation);
}
InOrderExecInfo::InOrderExecInfo(NEO::GraphicsAllocation &inOrderDependencyCounterAllocation, NEO::MemoryManager &memoryManager, bool isRegularCmdList)
: inOrderDependencyCounterAllocation(inOrderDependencyCounterAllocation), memoryManager(memoryManager), isRegularCmdList(isRegularCmdList) {
}
} // namespace L0

View File

@@ -7,12 +7,32 @@
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/ptr_math.h"
#include <cstdint>
#include <vector>
namespace NEO {
class GraphicsAllocation;
class MemoryManager;
} // namespace NEO
namespace L0 {
struct InOrderExecInfo : public NEO::NonCopyableClass {
~InOrderExecInfo();
InOrderExecInfo() = delete;
InOrderExecInfo(NEO::GraphicsAllocation &inOrderDependencyCounterAllocation, NEO::MemoryManager &memoryManager, bool isRegularCmdList);
NEO::GraphicsAllocation &inOrderDependencyCounterAllocation;
NEO::MemoryManager &memoryManager;
uint64_t regularCmdListSubmissionCounter = 0;
bool isRegularCmdList = false;
};
namespace InOrderPatchCommandTypes {
enum class CmdType {
None,

View File

@@ -78,7 +78,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::initialize;
using BaseClass::inOrderAllocationOffset;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::inOrderExecInfo;
using BaseClass::inOrderPatchCmds;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isQwordInOrderCounter;
@@ -90,7 +90,6 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::patternAllocations;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::regularCmdListSubmissionCounter;
using BaseClass::requiredStreamState;
using BaseClass::requiresQueueUncachedMocs;
using BaseClass::setupTimestampEventForMultiTile;
@@ -172,7 +171,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::hostSynchronize;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::inOrderExecInfo;
using BaseClass::inOrderPatchCmds;
using BaseClass::isBcsSplitNeeded;
using BaseClass::isFlushTaskSubmissionEnabled;

View File

@@ -1769,7 +1769,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
lrrCmd++;
lrrCmd++;
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, cmdList->inOrderDependencyCounterAllocation->getGpuAddress(), 2,
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, cmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), 2,
NEO::CompareOperation::Less, true, cmdList->isQwordInOrderCounter()));
}

View File

@@ -667,7 +667,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
struct MockEvent : public EventImp<uint32_t> {
using EventImp<uint32_t>::inOrderExecEvent;
using EventImp<uint32_t>::maxPacketCount;
using EventImp<uint32_t>::inOrderExecDataAllocation;
using EventImp<uint32_t>::inOrderExecInfo;
using EventImp<uint32_t>::inOrderExecSignalValue;
using EventImp<uint32_t>::inOrderAllocationOffset;
using EventImp<uint32_t>::csrs;
@@ -909,11 +909,13 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallW
auto eventPool = createEvents<FamilyType>(2, false);
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(2));
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
events[0]->inOrderAllocationOffset = 123;
auto hostAddress = castToUint64(ptrOffset(events[0]->inOrderExecDataAllocation->getUnderlyingBuffer(), events[0]->inOrderAllocationOffset));
auto hostAddress = castToUint64(ptrOffset(events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), events[0]->inOrderAllocationOffset));
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
@@ -954,18 +956,18 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
EXPECT_TRUE(events[0]->inOrderExecEvent);
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation);
EXPECT_EQ(&events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation, &immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
events[0]->inOrderAllocationOffset = 123;
events[0]->reset();
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr);
EXPECT_EQ(events[0]->inOrderExecInfo, nullptr);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
}
@@ -979,7 +981,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenDontSetIn
EXPECT_FALSE(events[0]->inOrderExecEvent);
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr);
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
}
@@ -1012,7 +1014,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
@@ -1113,7 +1115,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingStoreDataImmThenP
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*itor);
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
auto userInterruptCmd = genCmdCast<MI_USER_INTERRUPT *>(*(++itor));
ASSERT_NE(nullptr, userInterruptCmd);
@@ -1218,42 +1220,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromAfterReset
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, retValue);
}
HWTEST2_F(InOrderCmdListTests, givenMultipleAllocationOwnerWhenUsingEventsThenSetCorrectOwnersCount, IsAtLeastSkl) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto immCmdList0 = createImmCmdList<gfxCoreFamily>();
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(2, false);
auto eventHandle0 = events[0]->toHandle();
auto eventHandle1 = events[1]->toHandle();
auto inOrderAlloc0 = immCmdList0->inOrderDependencyCounterAllocation;
auto validateNumOwners = [&inOrderAlloc0](uint32_t expectedValue) {
inOrderAlloc0->incNumOwners();
auto fetchValue = inOrderAlloc0->fetchDecNumOwners();
EXPECT_EQ(expectedValue, fetchValue - 1);
};
validateNumOwners(1);
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false);
validateNumOwners(2);
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false);
validateNumOwners(2);
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle1, 0, nullptr, launchParams, false);
validateNumOwners(3);
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false);
validateNumOwners(2);
events[1]->reset();
validateNumOwners(1);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSemaphoreOnlyForExternalEvent, IsAtLeastXeHpCore) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
@@ -1301,7 +1267,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem
ASSERT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset2, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset2, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation());
itor++;
@@ -1360,11 +1326,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentC
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation]);
immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation]);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
@@ -1376,8 +1342,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentC
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_NE(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress());
EXPECT_EQ(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_NE(immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress());
EXPECT_EQ(immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
@@ -1400,8 +1366,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEvent
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastXeHpCore) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation);
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType());
EXPECT_NE(nullptr, immCmdList->inOrderExecInfo.get());
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getAllocationType());
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
@@ -1410,11 +1376,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastXeHpCore) {
@@ -1476,7 +1442,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(1u, postSync.getImmediateData());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress());
}
auto offset = cmdStream->getUsed();
@@ -1498,10 +1464,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(2u, postSync.getImmediateData());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress());
}
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), counterOffset));
*hostAddress = 1;
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
@@ -1563,7 +1529,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(++semaphoreCmd);
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
}
@@ -1678,7 +1644,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
}
@@ -1778,7 +1744,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenCl
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(++semaphoreCmd);
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
}
@@ -1874,7 +1840,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
uint64_t expectedAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + immCmdList->inOrderAllocationOffset;
uint64_t expectedAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + immCmdList->inOrderAllocationOffset;
EXPECT_EQ(expectedAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -1922,7 +1888,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
uint64_t expectedAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + immCmdList->inOrderAllocationOffset;
uint64_t expectedAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + immCmdList->inOrderAllocationOffset;
EXPECT_EQ(expectedAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -1947,7 +1913,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT
immCmdList->appendSignalEvent(events[0]->toHandle());
uint64_t inOrderSyncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t inOrderSyncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
@@ -1995,7 +1961,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingNonKernelAppendThe
auto eventPool = createEvents<FamilyType>(1, true);
uint64_t inOrderSyncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t inOrderSyncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
uint8_t ptr[64] = {};
@@ -2087,7 +2053,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
uint8_t ptr[64] = {};
uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t inOrderSyncVa = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
@@ -2228,7 +2194,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
ASSERT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
auto walkerItor = find<WALKER_TYPE *>(semaphoreItor, cmdList.end());
EXPECT_NE(cmdList.end(), walkerItor);
@@ -2299,7 +2265,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2328,7 +2294,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingComputeCopyThenDon
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(walkerItor, cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
@@ -2364,7 +2330,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingFillThenSi
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2415,7 +2381,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOu
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2456,7 +2422,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndWi
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2492,7 +2458,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithoutSplitTh
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(1u, postSync.getImmediateData());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(walkerItor, cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
@@ -2529,7 +2495,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2568,14 +2534,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
ASSERT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(semaphoreItor, cmdList.end());
ASSERT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(3u, sdiCmd->getDataDword0());
}
@@ -2614,7 +2580,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t syncVa = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -2637,7 +2603,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
auto eventHandle = events[0]->toHandle();
uint64_t baseGpuVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
uint64_t baseGpuVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false);
@@ -2651,7 +2617,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), postSync.getImmediateData());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(walkerItor, cmdList.end());
@@ -2668,7 +2634,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(expectedCounter, postSync.getImmediateData());
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
} else {
ASSERT_NE(cmdList.end(), semaphoreItor);
@@ -2724,7 +2690,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThe
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList2->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
@@ -2762,7 +2728,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithW
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList2->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
@@ -2859,7 +2825,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
@@ -2896,7 +2862,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
@@ -2914,7 +2880,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), counterOffset));
*hostAddress = 0;
const uint32_t failCounter = 3;
@@ -2988,7 +2954,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
auto eventHandle = events[0]->toHandle();
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer());
*hostAddress = 0;
const uint32_t failCounter = 3;
@@ -3043,7 +3009,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer());
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@@ -3098,7 +3064,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
@@ -3148,7 +3114,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
@@ -3198,7 +3164,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*cmdList.begin());
ASSERT_NE(nullptr, semaphoreCmd);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(gpuAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
@@ -3225,7 +3191,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSy
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*cmdList.begin());
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_TRUE(sdiCmd->getWorkloadPartitionIdOffsetEnable());
@@ -3240,7 +3206,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
auto hostAddress0 = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress0 = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer());
auto hostAddress1 = ptrOffset(hostAddress0, sizeof(uint64_t));
*hostAddress0 = 0;
@@ -3521,7 +3487,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -3561,7 +3527,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -3791,7 +3757,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(1u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
@@ -3817,14 +3783,14 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(2u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
}
regularCmdList->inOrderAllocationOffset = 123;
auto hostAddr = static_cast<uint64_t *>(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddr = static_cast<uint64_t *>(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer());
*hostAddr = 0x1234;
regularCmdList->reset();
@@ -3849,7 +3815,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
EXPECT_NE(nullptr, regularCmdList->inOrderExecInfo.get());
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
@@ -3920,7 +3886,7 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
@@ -3953,7 +3919,7 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto gpuAddress = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());

View File

@@ -1137,28 +1137,32 @@ HWTEST2_F(ExecuteCommandListTests, givenRegularCmdListWhenExecutionThenIncSubmis
{
auto computeCmdList = makeZeUniquePtr<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
computeCmdList->initialize(device, NEO::EngineGroupType::Compute, 0u);
computeCmdList->enableInOrderExecution();
auto commandListHandle = computeCmdList->toHandle();
computeCmdList->close();
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(1u, computeCmdList->regularCmdListSubmissionCounter);
EXPECT_EQ(1u, computeCmdList->inOrderExecInfo->regularCmdListSubmissionCounter);
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(2u, computeCmdList->regularCmdListSubmissionCounter);
EXPECT_EQ(2u, computeCmdList->inOrderExecInfo->regularCmdListSubmissionCounter);
}
{
auto copyCmdList = makeZeUniquePtr<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
copyCmdList->initialize(device, NEO::EngineGroupType::Copy, 0u);
copyCmdList->enableInOrderExecution();
auto commandListHandle = copyCmdList->toHandle();
copyCmdList->close();
commandQueue->isCopyOnlyCommandQueue = true;
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(1u, copyCmdList->regularCmdListSubmissionCounter);
EXPECT_EQ(1u, copyCmdList->inOrderExecInfo->regularCmdListSubmissionCounter);
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(2u, copyCmdList->regularCmdListSubmissionCounter);
EXPECT_EQ(2u, copyCmdList->inOrderExecInfo->regularCmdListSubmissionCounter);
}
}

View File

@@ -3195,23 +3195,25 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
uint64_t storage[2] = {1, 1};
NEO::MockGraphicsAllocation syncAllocation(&storage, sizeof(storage));
auto syncAllocation = new NEO::MockGraphicsAllocation(&storage, sizeof(storage));
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(*syncAllocation, *neoDevice->getMemoryManager(), false);
event->inOrderExecEvent = true;
event->updateInOrderExecState(syncAllocation, 1, 0);
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
auto result = event->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, downloadAllocationTrack[&syncAllocation]);
EXPECT_EQ(0u, downloadAllocationTrack[syncAllocation]);
EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount);
auto event2 = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
event2->inOrderExecEvent = true;
event2->updateInOrderExecState(syncAllocation, 1, 0);
syncAllocation.updateTaskCount(0u, ultCsr->getOsContext().getContextId());
event2->updateInOrderExecState(inOrderExecInfo, 1, 0);
syncAllocation->updateTaskCount(0u, ultCsr->getOsContext().getContextId());
ultCsr->downloadAllocationsCalledCount = 0;
eventAddress = static_cast<TagAddressType *>(event->getHostAddress());
*eventAddress = Event::STATE_SIGNALED;
@@ -3219,7 +3221,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
result = event2->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, downloadAllocationTrack[&syncAllocation]);
EXPECT_NE(0u, downloadAllocationTrack[syncAllocation]);
EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount);
}