diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 63265145f6..2f5a7391e0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -292,7 +292,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } - if (event->useContextEndOffset()) { + if (event->isUsingContextEndOffset()) { baseAddr += event->getContextEndOffset(); } @@ -1641,6 +1641,9 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); uint64_t baseAddr = event->getGpuAddress(this->device); + if (event->isUsingContextEndOffset()) { + baseAddr += event->getContextEndOffset(); + } const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { @@ -1654,8 +1657,6 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ if (this->partitionCount > 1) { args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); - event->setPartitionedEvent(true); - baseAddr += event->getContextEndOffset(); } NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), @@ -1797,11 +1798,8 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } size_t eventSignalOffset = 0; - if (this->partitionCount > 1) { - event->setPartitionedEvent(true); - event->setPacketsInUse(this->partitionCount); - } - if (event->useContextEndOffset()) { + + if (event->isUsingContextEndOffset()) { eventSignalOffset = event->getContextEndOffset(); } @@ -1815,7 +1813,10 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han NEO::PipeControlArgs args; bool applyScope = event->signalScope; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(applyScope, hwInfo); - args.workloadPartitionOffset = event->isPartitionedEvent(); + if (this->partitionCount > 1) { + event->setPacketsInUse(this->partitionCount); + args.workloadPartitionOffset = true; + } if (applyScope || event->isEventTimestampFlagSet()) { NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), @@ -1892,7 +1893,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu gpuAddr = event->getGpuAddress(this->device); uint32_t packetsToWait = event->getPacketsInUse(); - if (event->useContextEndOffset()) { + if (event->isUsingContextEndOffset()) { gpuAddr += event->getContextEndOffset(); } for (uint32_t i = 0u; i < packetsToWait; i++) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 357956fff7..e3d09d7fee 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -121,21 +121,22 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z updateStreamProperties(*kernel, false, isCooperative); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - 0, //eventAddress - neoDevice, //device - kernel, //dispatchInterface - reinterpret_cast(pThreadGroupDimensions), //pThreadGroupDimensions - commandListPreemptionMode, //preemptionMode - 0, //partitionCount - isIndirect, //isIndirect - isPredicate, //isPredicate - false, //isTimestampEvent - false, //L3FlushEnable - this->containsStatelessUncachedResource, //requiresUncachedMocs - false, //useGlobalAtomics - internalUsage, //isInternal - isCooperative //isCooperative + 0, // eventAddress + neoDevice, // device + kernel, // dispatchInterface + reinterpret_cast(pThreadGroupDimensions), // pThreadGroupDimensions + commandListPreemptionMode, // preemptionMode + 0, // partitionCount + isIndirect, // isIndirect + isPredicate, // isPredicate + false, // isTimestampEvent + false, // L3FlushEnable + this->containsStatelessUncachedResource, // requiresUncachedMocs + false, // useGlobalAtomics + internalUsage, // isInternal + isCooperative // isCooperative }; + NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index d8f137f10b..18a1706cb3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -21,6 +21,7 @@ #include "shared/source/xe_hp_core/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/source/module/module.h" @@ -94,7 +95,7 @@ void programEventL3Flush(ze_event_handle_t hEvent, auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize()) : event->getSinglePacketSize(); uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset; - if (event->useContextEndOffset()) { + if (event->isUsingContextEndOffset()) { eventAddress += event->getContextEndOffset(); } @@ -163,7 +164,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z eventAlloc = &event->getAllocation(this->device); commandContainer.addToResidencyContainer(eventAlloc); L3FlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); - isTimestampEvent = event->isEventTimestampFlagSet(); + isTimestampEvent = event->isUsingContextEndOffset(); eventAddress = event->getPacketAddress(this->device); } @@ -214,20 +215,20 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - eventAddress, //eventAddress - neoDevice, //device - kernel, //dispatchInterface - reinterpret_cast(pThreadGroupDimensions), //pThreadGroupDimensions - commandListPreemptionMode, //preemptionMode - this->partitionCount, //partitionCount - isIndirect, //isIndirect - isPredicate, //isPredicate - isTimestampEvent, //isTimestampEvent - L3FlushEnable, //L3FlushEnable - this->containsStatelessUncachedResource, //requiresUncachedMocs - kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics - internalUsage, //isInternal - isCooperative //isCooperative + eventAddress, // eventAddress + neoDevice, // device + kernel, // dispatchInterface + reinterpret_cast(pThreadGroupDimensions), // pThreadGroupDimensions + commandListPreemptionMode, // preemptionMode + this->partitionCount, // partitionCount + isIndirect, // isIndirect + isPredicate, // isPredicate + isTimestampEvent, // isTimestampEvent + L3FlushEnable, // L3FlushEnable + this->containsStatelessUncachedResource, // requiresUncachedMocs + kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics + internalUsage, // isInternal + isCooperative // isCooperative }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; @@ -236,7 +237,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z auto event = Event::fromHandle(hEvent); if (partitionCount > 1) { event->setPacketsInUse(partitionCount); - event->setPartitionedEvent(true); } if (L3FlushEnable) { programEventL3Flush(hEvent, this->device, partitionCount, commandContainer); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 6829c7613b..651a9d3cd2 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -88,14 +88,11 @@ struct Event : _ze_event_handle_t { bool isEventTimestampFlagSet() const { return isTimestampEvent; } - void setPartitionedEvent(bool partitionedEvent) { - this->partitionedEvent = partitionedEvent; + void setUsingContextEndOffset(bool usingContextEndOffset) { + this->usingContextEndOffset = usingContextEndOffset; } - bool isPartitionedEvent() const { - return partitionedEvent; - } - bool useContextEndOffset() const { - return isTimestampEvent || partitionedEvent; + bool isUsingContextEndOffset() const { + return isTimestampEvent || usingContextEndOffset; } uint64_t globalStartTS; @@ -123,8 +120,9 @@ struct Event : _ze_event_handle_t { size_t globalEndOffset = 0u; size_t timestampSizeInDw = 0u; size_t singlePacketSize = 0u; + size_t eventPoolOffset = 0u; bool isTimestampEvent = false; - bool partitionedEvent = false; + bool usingContextEndOffset = false; }; template diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 25e7398e79..74c1bd230c 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -8,6 +8,7 @@ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace L0 { template @@ -18,15 +19,18 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * if (eventPool->isEventPoolTimestampFlagSet()) { event->setEventTimestampFlag(true); } + auto neoDevice = device->getNEODevice(); event->kernelEventCompletionData = std::make_unique[]>(EventPacketsCount::maxKernelSplit); - auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); + auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex()); uint64_t baseHostAddr = reinterpret_cast(alloc->getUnderlyingBuffer()); - event->hostAddress = reinterpret_cast(baseHostAddr + (desc->index * eventPool->getEventSize())); + event->eventPoolOffset = desc->index * eventPool->getEventSize(); + event->hostAddress = reinterpret_cast(baseHostAddr + event->eventPoolOffset); event->signalScope = desc->signal; event->waitScope = desc->wait; - event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + event->csr = neoDevice->getDefaultEngine().commandStreamReceiver; + event->setUsingContextEndOffset(L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()); EventPoolImp *EventPoolImp = static_cast(eventPool); // do not reset even if it has been imported, since event pool @@ -41,7 +45,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * template uint64_t EventImp::getGpuAddress(Device *device) { auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); - return (alloc->getGpuAddress() + (index * eventPool->getEventSize())); + return (alloc->getGpuAddress() + this->eventPoolOffset); } template @@ -120,7 +124,7 @@ ze_result_t EventImp::queryStatusNonTimestamp() { for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - void const *queryAddress = partitionedEvent + void const *queryAddress = usingContextEndOffset ? kernelEventCompletionData[i].getContextEndAddress(packetId) : kernelEventCompletionData[i].getContextStartAddress(packetId); bool ready = NEO::WaitUtils::waitFunctionWithPredicate( @@ -138,9 +142,11 @@ ze_result_t EventImp::queryStatusNonTimestamp() { template ze_result_t EventImp::queryStatus() { - TagSizeT *hostAddr = static_cast(hostAddress); - if (metricStreamer != nullptr) { + TagSizeT *hostAddr = static_cast(hostAddress); + if (usingContextEndOffset) { + hostAddr = ptrOffset(hostAddr, this->getContextEndOffset()); + } *hostAddr = metricStreamer->getNotificationState(); } this->csr->downloadAllocations(); @@ -185,21 +191,21 @@ ze_result_t EventImp::hostEventSetValueTimestamps(TagSizeT eventVal) { template ze_result_t EventImp::hostEventSetValue(TagSizeT eventVal) { + UNRECOVERABLE_IF(hostAddress == nullptr); + if (isEventTimestampFlagSet()) { return hostEventSetValueTimestamps(eventVal); } auto packetHostAddr = hostAddress; - UNRECOVERABLE_IF(packetHostAddr == nullptr); + if (usingContextEndOffset) { + packetHostAddr = ptrOffset(packetHostAddr, contextEndOffset); + } for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t j = 0; j < packetsToSet; j++) { memcpy_s(packetHostAddr, sizeof(TagSizeT), static_cast(&eventVal), sizeof(TagSizeT)); - if (isPartitionedEvent()) { - void *packetContextEndAddr = ptrOffset(packetHostAddr, contextEndOffset); - memcpy_s(packetContextEndAddr, sizeof(TagSizeT), static_cast(&eventVal), sizeof(TagSizeT)); - } packetHostAddr = ptrOffset(packetHostAddr, singlePacketSize); } } @@ -268,10 +274,8 @@ ze_result_t EventImp::reset() { kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); } } - partitionedEvent = true; hostEventSetValue(Event::STATE_INITIAL); resetPackets(); - partitionedEvent = false; return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/hw_helpers/l0_hw_helper.h b/level_zero/core/source/hw_helpers/l0_hw_helper.h index d7fc6559b2..8986561021 100644 --- a/level_zero/core/source/hw_helpers/l0_hw_helper.h +++ b/level_zero/core/source/hw_helpers/l0_hw_helper.h @@ -39,6 +39,7 @@ class L0HwHelper { virtual void getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const = 0; virtual std::vector getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const = 0; + virtual bool multiTileCapablePlatform() const = 0; protected: L0HwHelper() = default; @@ -62,6 +63,7 @@ class L0HwHelperHw : public L0HwHelper { bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const override; void getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const override; std::vector getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const override; + bool multiTileCapablePlatform() const override; }; } // namespace L0 diff --git a/level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl b/level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl index c837185547..f41519a9b1 100644 --- a/level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl +++ b/level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl @@ -13,4 +13,9 @@ template void L0HwHelperHw::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const { } +template +bool L0HwHelperHw::multiTileCapablePlatform() const { + return false; +} + } // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp b/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp index 6088a53118..8f788f6ae3 100644 --- a/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp +++ b/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -27,6 +27,11 @@ bool L0HwHelperHw::isResumeWARequired() { return true; } +template <> +bool L0HwHelperHw::multiTileCapablePlatform() const { + return true; +} + // clang-format off #include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl" // clang-format on diff --git a/level_zero/core/source/xe_hpc_core/l0_hw_helper_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/l0_hw_helper_xe_hpc_core.cpp index ffb0ae4b38..afce869e38 100644 --- a/level_zero/core/source/xe_hpc_core/l0_hw_helper_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/l0_hw_helper_xe_hpc_core.cpp @@ -29,6 +29,11 @@ bool L0HwHelperHw::isIpSamplingSupported(const NEO::HardwareInfo &hwInfo return NEO::PVC::isXt(hwInfo); } +template <> +bool L0HwHelperHw::multiTileCapablePlatform() const { + return true; +} + template class L0HwHelperHw; } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 6e346fa388..0e56ccc5b8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -40,18 +40,22 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPost auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); - bool postSyncFound = false; + auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + uint32_t postSyncFound = 0; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL); - auto gpuAddress = event->getGpuAddress(device); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - postSyncFound = true; + postSyncFound++; } } - ASSERT_TRUE(postSyncFound); + EXPECT_EQ(1u, postSyncFound); } HWTEST_F(CommandListAppendEventReset, whenResetEventIsAppendedAndNoSpaceIsAvailableThenNextCommandBufferIsCreated) { @@ -202,6 +206,9 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip commandList->appendEventReset(event->toHandle()); auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -246,7 +253,7 @@ HWTEST2_F(CommandListAppendEventReset, constexpr uint32_t packets = 2u; event->setPacketsInUse(packets); event->setEventTimestampFlag(false); - event->setPartitionedEvent(true); + event->setUsingContextEndOffset(true); event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 42c401590b..ca97cdc17d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1081,7 +1081,14 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); - EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & device->getHwInfo().capabilityTable.gpuAddressSpace, event->getGpuAddress(device) & device->getHwInfo().capabilityTable.gpuAddressSpace); + auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; + + uint64_t gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 343d98f0fd..3045f8e8b8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -37,10 +37,13 @@ HWTEST_F(CommandListAppendSignalEvent, WhenAppendingSignalEventWithoutScopeThenM cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto baseAddr = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + baseAddr += event->getContextEndOffset(); + } auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); + ASSERT_NE(itor, cmdList.end()); auto cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getAddress(), baseAddr); + EXPECT_EQ(baseAddr, cmd->getAddress()); } HWTEST_F(CommandListAppendSignalEvent, givenCmdlistWhenAppendingSignalEventThenEventPoolGraphicsAllocationIsAddedToResidencyContainer) { @@ -200,6 +203,7 @@ HWTEST2_F(CommandListAppendSignalEvent, constexpr uint32_t packets = 2u; event->setEventTimestampFlag(false); + event->setUsingContextEndOffset(true); event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; @@ -253,6 +257,7 @@ HWTEST2_F(CommandListAppendSignalEvent, constexpr uint32_t packets = 2u; event->setEventTimestampFlag(false); + event->setUsingContextEndOffset(true); event->signalScope = 0; commandList->partitionCount = packets; @@ -316,7 +321,10 @@ HWTEST2_F(CommandListAppendSignalEvent, commandList->appendSignalEventPostWalker(event->toHandle()); EXPECT_EQ(packets, event->getPacketsInUse()); - auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); + auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } auto &hwInfo = device->getNEODevice()->getHardwareInfo(); size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 1819f7beb7..570642b23a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -47,7 +47,12 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; - EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); + uint64_t gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } @@ -73,15 +78,19 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe auto itor = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, itor.size()); + auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; + + uint64_t gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + for (int i = 0; i < 2; i++) { auto cmd = genCmdCast(*itor[i]); EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); - - auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; - - EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); + EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } @@ -266,7 +275,12 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenCommandListWhenAppendWriteGlobalTim auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; - EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); + uint64_t gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); @@ -322,6 +336,9 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai EXPECT_NE(oldCommandBuffer, newCommandBuffer); auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, @@ -369,7 +386,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent, size_t expectedSize = commandList->partitionCount * sizeof(MI_SEMAPHORE_WAIT); event->setPacketsInUse(commandList->partitionCount); - event->setPartitionedEvent(true); + event->setUsingContextEndOffset(true); ze_event_handle_t eventHandle = event->toHandle(); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 8de8bf8e12..43eb6685c8 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -16,6 +16,7 @@ #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" @@ -552,6 +553,12 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); + if (L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()) { + EXPECT_TRUE(event->isUsingContextEndOffset()); + } else { + EXPECT_FALSE(event->isUsingContextEndOffset()); + } + result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, result); @@ -671,23 +678,38 @@ TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithNonZeroTimeoutAn EXPECT_EQ(ZE_RESULT_NOT_READY, result); } -TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroAndStateSignaledHostSynchronizeReturnsSuccess) { +TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroWhenStateSignaledThenHostSynchronizeReturnsSuccess) { uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; + + event->setUsingContextEndOffset(false); ze_result_t result = event->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } -TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutNonZeroAndStateSignaledHostSynchronizeReturnsSuccess) { +TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutNonZeroWhenStateSignaledThenHostSynchronizeReturnsSuccess) { uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; + + event->setUsingContextEndOffset(false); ze_result_t result = event->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } +TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroWhenOffsetEventStateSignaledThenHostSynchronizeReturnsSuccess) { + uint32_t *hostAddr = static_cast(event->getHostAddress()); + hostAddr = ptrOffset(hostAddr, event->getContextEndOffset()); + *hostAddr = Event::STATE_SIGNALED; + + event->setUsingContextEndOffset(true); + ze_result_t result = event->hostSynchronize(0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { constexpr uint32_t packetsInUse = 2; event->setPacketsInUse(packetsInUse); + event->setUsingContextEndOffset(false); const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextStartOffset(); @@ -720,10 +742,10 @@ TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForNonTimestampEvent EXPECT_EQ(ZE_RESULT_SUCCESS, result); } -TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForPartitionedNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { +TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForOffsetedNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { constexpr uint32_t packetsInUse = 2; event->setPacketsInUse(packetsInUse); - event->setPartitionedEvent(true); + event->setUsingContextEndOffset(true); const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextEndOffset(); @@ -820,11 +842,13 @@ TEST_F(EventPoolIPCEventResetTests, whenOpeningIpcHandleForEventPoolCreateWithIp EXPECT_NE(nullptr, event0); uint32_t *hostAddr = static_cast(event0->getHostAddress()); + if (event0->isUsingContextEndOffset()) { + hostAddr = ptrOffset(hostAddr, event0->getContextEndOffset()); + } EXPECT_EQ(*hostAddr, Event::STATE_INITIAL); // change state event0->hostSignal(); - hostAddr = static_cast(event0->getHostAddress()); EXPECT_EQ(*hostAddr, Event::STATE_SIGNALED); // create an event from the pool with the same index as event0, but this time, since isImportedIpcPool is true, no reset should happen @@ -836,6 +860,9 @@ TEST_F(EventPoolIPCEventResetTests, whenOpeningIpcHandleForEventPoolCreateWithIp EXPECT_NE(nullptr, event1); uint32_t *hostAddr1 = static_cast(event1->getHostAddress()); + if (event1->isUsingContextEndOffset()) { + hostAddr1 = ptrOffset(hostAddr1, event1->getContextEndOffset()); + } EXPECT_EQ(*hostAddr1, Event::STATE_SIGNALED); // create another event from the pool with the same index, but this time, since isImportedIpcPool is false, reset should happen @@ -847,6 +874,9 @@ TEST_F(EventPoolIPCEventResetTests, whenOpeningIpcHandleForEventPoolCreateWithIp EXPECT_NE(nullptr, event2); uint32_t *hostAddr2 = static_cast(event2->getHostAddress()); + if (event2->isUsingContextEndOffset()) { + hostAddr2 = ptrOffset(hostAddr2, event2->getContextEndOffset()); + } EXPECT_EQ(*hostAddr2, Event::STATE_INITIAL); } @@ -1499,11 +1529,10 @@ TEST_F(EventTests, GivenResetWhenQueryingStatusThenNotReadyIsReturned) { auto result = event->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - event->setPartitionedEvent(true); + event->setUsingContextEndOffset(true); result = event->reset(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_FALSE(event->isPartitionedEvent()); EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); @@ -1550,6 +1579,9 @@ TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAl ASSERT_NE(event, nullptr); uint32_t *hostAddr = static_cast(event->getHostAddress()); + if (event->isUsingContextEndOffset()) { + hostAddr = ptrOffset(hostAddr, event->getContextEndOffset()); + } EXPECT_EQ(*hostAddr, Event::STATE_INITIAL); EXPECT_EQ(1u, event->getPacketsInUse()); @@ -1563,7 +1595,7 @@ TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAl } } -TEST_F(EventTests, givenPartitionedEventUseMultiplePacketsWhenHostSignalThenExpectAllPacketsAreSignaled) { +TEST_F(EventTests, givenEventUseMultiplePacketsWhenHostSignalThenExpectAllPacketsAreSignaled) { eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; @@ -1572,7 +1604,12 @@ TEST_F(EventTests, givenPartitionedEventUseMultiplePacketsWhenHostSignalThenExpe device))); ASSERT_NE(event, nullptr); - uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), event->getContextEndOffset())); + size_t eventOffset = 0; + if (event->isUsingContextEndOffset()) { + eventOffset = event->getContextEndOffset(); + } + + uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventOffset)); EXPECT_EQ(Event::STATE_INITIAL, *hostAddr); EXPECT_EQ(1u, event->getPacketsInUse()); @@ -1580,7 +1617,6 @@ TEST_F(EventTests, givenPartitionedEventUseMultiplePacketsWhenHostSignalThenExpe constexpr uint32_t packetsUsed = 4u; event->setPacketsInUse(packetsUsed); event->setEventTimestampFlag(false); - event->setPartitionedEvent(true); event->hostSignal(); for (uint32_t i = 0; i < packetsUsed; i++) { diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp index c0ce16348b..b16d4b9424 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp @@ -19,6 +19,7 @@ namespace ult { using L0HwHelperTest = ::testing::Test; using PlatformsWithWa = IsWithinGfxCore; +using NonMultiTilePlatforms = IsWithinGfxCore; HWTEST2_F(L0HwHelperTest, givenResumeWANotNeededThenFalseIsReturned, IsAtMostGen11) { auto &l0HwHelper = L0::L0HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily); @@ -560,5 +561,9 @@ HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWh } } +HWTEST2_F(L0HwHelperTest, GivenNonMultiTilePlatformsWhenCheckingL0HelperForMultiTileCapablePlatformThenReturnFalse, NonMultiTilePlatforms) { + EXPECT_FALSE(L0::L0HwHelperHw::get().multiTileCapablePlatform()); +} + } // namespace ult -} // namespace L0 \ No newline at end of file +} // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp index bd9cdf2fcd..1d85aea867 100644 --- a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp @@ -739,7 +739,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenEventWhenInvokingAppendLaunchKerne for (auto it : itorPS) { auto cmd = genCmdCast(*it); auto &postSync = cmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); EXPECT_EQ(gpuAddress, postSync.getDestinationAddress()); postSyncFound = true; } diff --git a/level_zero/core/test/unit_tests/xe_hp_core/test_l0_hw_helper_xe_hp_core.cpp b/level_zero/core/test/unit_tests/xe_hp_core/test_l0_hw_helper_xe_hp_core.cpp index 9fe51dfbea..65a147fce3 100644 --- a/level_zero/core/test/unit_tests/xe_hp_core/test_l0_hw_helper_xe_hp_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hp_core/test_l0_hw_helper_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/test/common/test_macros/test.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { @@ -18,5 +19,11 @@ HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhe HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); +using L0HwHelperTestXeHp = ::testing::Test; + +XEHPTEST_F(L0HwHelperTestXeHp, GivenXeHpWhenCheckingL0HelperForMultiTileCapablePlatformThenReturnTrue) { + EXPECT_TRUE(L0::L0HwHelperHw::get().multiTileCapablePlatform()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/CMakeLists.txt b/level_zero/core/test/unit_tests/xe_hpc_core/CMakeLists.txt index 066a8f1ad5..049e270f67 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/CMakeLists.txt +++ b/level_zero/core/test/unit_tests/xe_hpc_core/CMakeLists.txt @@ -11,6 +11,7 @@ if(TESTS_XE_HPC_CORE) ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe_hpc_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_hw_helper_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe_hpc_core.cpp ) add_subdirectories() diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_hw_helper_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_hw_helper_xe_hpc_core.cpp new file mode 100644 index 0000000000..d002abd98f --- /dev/null +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_hw_helper_xe_hpc_core.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/test_macros/test.h" + +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" + +namespace L0 { +namespace ult { + +using L0HwHelperTestXeHpc = ::testing::Test; + +XE_HPC_CORETEST_F(L0HwHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForMultiTileCapablePlatformThenReturnTrue) { + EXPECT_TRUE(L0::L0HwHelperHw::get().multiTileCapablePlatform()); +} + +} // namespace ult +} // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp index 354f40b8ca..ad3a3c11b1 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp @@ -10,11 +10,13 @@ #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" @@ -377,5 +379,73 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } + +using CommandListAppendLaunchKernelXeHpgCore = Test; +HWTEST2_F(CommandListAppendLaunchKernelXeHpgCore, givenEventWhenAppendKernelIsCalledThenImmediateDataPostSyncIsAdded, IsXeHpgCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + Mock<::L0::Kernel> kernel; + auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = pMockModule.get(); + + kernel.setGroupSize(1, 1, 1); + ze_group_count_t groupCount{8, 1, 1}; + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::CooperativeCompute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + EXPECT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore), + usedSpaceAfter - usedSpaceBefore)); + + auto gpuAddress = event->getGpuAddress(device); + + auto itorWalker = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorWalker); + auto cmdWalker = genCmdCast(*itorWalker); + auto &postSync = cmdWalker->getPostSync(); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(gpuAddress, postSync.getDestinationAddress()); + + gpuAddress += event->getSinglePacketSize(); + auto itorPC = findAll(itorWalker, cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + uint32_t postSyncCount = 0u; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + postSyncCount++; + } + } + EXPECT_EQ(1u, postSyncCount); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_hw_helper_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_hw_helper_xe_hpg_core.cpp index 335adc1e9d..2eb7ab1d77 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_hw_helper_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_hw_helper_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/test/common/test_macros/test.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { @@ -18,5 +19,11 @@ HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhe HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); +using L0HwHelperTestXeHpg = ::testing::Test; + +XE_HPG_CORETEST_F(L0HwHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForMultiTileCapablePlatformThenReturnFalse) { + EXPECT_FALSE(L0::L0HwHelperHw::get().multiTileCapablePlatform()); +} + } // namespace ult } // namespace L0 diff --git a/shared/test/common/fixtures/command_container_fixture.h b/shared/test/common/fixtures/command_container_fixture.h index f7e6519e3d..65680d24de 100644 --- a/shared/test/common/fixtures/command_container_fixture.h +++ b/shared/test/common/fixtures/command_container_fixture.h @@ -38,20 +38,20 @@ class CommandEncodeStatesFixture : public DeviceFixture { const void *pThreadGroupDimensions, bool requiresUncachedMocs) { EncodeDispatchKernelArgs args{ - 0, //eventAddress - device, //device - dispatchInterface, //dispatchInterface - pThreadGroupDimensions, //pThreadGroupDimensions - PreemptionMode::Disabled, //preemptionMode - 1, //partitionCount - false, //isIndirect - false, //isPredicate - false, //isTimestampEvent - false, //L3FlushEnable - requiresUncachedMocs, //requiresUncachedMocs - false, //useGlobalAtomics - false, //isInternal - false //isCooperative + 0, // eventAddress + device, // device + dispatchInterface, // dispatchInterface + pThreadGroupDimensions, // pThreadGroupDimensions + PreemptionMode::Disabled, // preemptionMode + 1, // partitionCount + false, // isIndirect + false, // isPredicate + false, // isTimestampEvent + false, // L3FlushEnable + requiresUncachedMocs, // requiresUncachedMocs + false, // useGlobalAtomics + false, // isInternal + false // isCooperative }; return args; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index c882d97748..470ddc02a1 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1206,3 +1206,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EXPECT_EQ(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_DISABLED, internalWalkerCmd->getPartitionType()); EXPECT_EQ(16u, internalWalkerCmd->getThreadGroupIdXDimension()); } + +HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhenTimestampPostSyncRequiredThenTimestampPostSyncIsAdded) { + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + uint64_t eventAddress = MemoryConstants::cacheLineSize * 123; + + bool requiresUncachedMocs = false; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + dispatchArgs.eventAddress = eventAddress; + dispatchArgs.isTimestampEvent = true; + + EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(itor, commands.end()); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, cmd->getPostSync().getOperation()); +}