mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 21:42:53 +08:00
Use only first event packet when PC
If PIPE_CONTROL is used to collect timestamps, only the first event packet is used and overwritten Signed-off-by: Daria Hinz <daria.hinz@intel.com> Related-To: NEO-5611
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
eb34c42e02
commit
bd35d2b3bd
@@ -1555,10 +1555,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
if (beforeWalker) {
|
||||
event->increasePacketsInUse();
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1650,13 +1646,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
auto dstptrAllocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents);
|
||||
commandContainer.addToResidencyContainer(dstptrAllocationStruct.alloc);
|
||||
|
||||
std::unique_ptr<EventData[]> timestampsAddress = std::make_unique<EventData[]>(numEvents);
|
||||
std::unique_ptr<EventData[]> timestampsData = std::make_unique<EventData[]>(numEvents);
|
||||
|
||||
for (uint32_t i = 0u; i < numEvents; ++i) {
|
||||
auto event = Event::fromHandle(phEvents[i]);
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
timestampsAddress[i].address = event->getGpuAddress();
|
||||
timestampsAddress[i].packetsInUse = event->getPacketsInUse();
|
||||
timestampsData[i].address = event->getGpuAddress();
|
||||
timestampsData[i].packetsInUse = event->getPacketsInUse();
|
||||
}
|
||||
|
||||
size_t alignedSize = alignUp<size_t>(sizeof(EventData) * numEvents, MemoryConstants::pageSize64k);
|
||||
@@ -1670,14 +1666,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
false,
|
||||
devices};
|
||||
|
||||
NEO::GraphicsAllocation *timestampsGPUAddress = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
NEO::GraphicsAllocation *timestampsGPUData = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
|
||||
UNRECOVERABLE_IF(timestampsGPUAddress == nullptr);
|
||||
UNRECOVERABLE_IF(timestampsGPUData == nullptr);
|
||||
|
||||
commandContainer.addToResidencyContainer(timestampsGPUAddress);
|
||||
commandContainer.getDeallocationContainer().push_back(timestampsGPUAddress);
|
||||
commandContainer.addToResidencyContainer(timestampsGPUData);
|
||||
commandContainer.getDeallocationContainer().push_back(timestampsGPUData);
|
||||
|
||||
bool result = device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(timestampsGPUAddress, 0, timestampsAddress.get(), sizeof(EventData) * numEvents);
|
||||
bool result = device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(timestampsGPUData, 0, timestampsData.get(), sizeof(EventData) * numEvents);
|
||||
|
||||
UNRECOVERABLE_IF(!result);
|
||||
|
||||
@@ -1717,7 +1713,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
|
||||
auto dstValPtr = static_cast<uintptr_t>(dstptrAllocationStruct.alloc->getGpuAddress());
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUAddress->getGpuAddress()), timestampsGPUAddress);
|
||||
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
|
||||
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstptrAllocationStruct.alloc);
|
||||
|
||||
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
|
||||
|
||||
@@ -54,7 +54,7 @@ struct Event : _ze_event_handle_t {
|
||||
|
||||
void *hostAddress = nullptr;
|
||||
uint64_t gpuAddress;
|
||||
uint32_t packetsInUse;
|
||||
uint32_t packetsInUse = 0u;
|
||||
|
||||
ze_event_scope_flags_t signalScope = 0u;
|
||||
ze_event_scope_flags_t waitScope = 0u;
|
||||
|
||||
@@ -1043,11 +1043,10 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCo
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
auto contextOffset = offsetof(TimestampPacketStorage::Packet, contextStart);
|
||||
auto globalOffset = offsetof(TimestampPacketStorage::Packet, globalStart);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
EXPECT_EQ(event->getTimestampPacketAddress(), baseAddr);
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event->toHandle(), true);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 1u);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
|
||||
@@ -1090,10 +1090,9 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopy
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
||||
EXPECT_GT(commandList.appendMemoryCopyBlitCalledTimes, 1u);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 1u);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -1177,12 +1176,11 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
||||
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalledTimes, 0u);
|
||||
EXPECT_EQ(commandList.appendMemoryCopyBlitCalledTimes, 0u);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 1u);
|
||||
EXPECT_EQ(event->getPacketsInUse(), 0u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
|
||||
@@ -137,6 +137,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
commandList->appendEventReset(event->toHandle());
|
||||
ASSERT_EQ(0u, event->getPacketsInUse());
|
||||
auto contextOffset = offsetof(TimestampPacketStorage::Packet, contextEnd);
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
auto gpuAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
@@ -489,6 +489,33 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelSeveralTimesThenAlwaysFirstEventPacketIsUsed, SupportedPlatforms) {
|
||||
createKernel();
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
|
||||
const ze_event_desc_t eventDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_HOST,
|
||||
ZE_EVENT_SCOPE_FLAG_HOST};
|
||||
|
||||
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<Event>(Event::create(eventPool.get(), &eventDesc, device));
|
||||
EXPECT_EQ(0u, event->getPacketsInUse());
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
for (uint32_t i = 0; i < NEO::TimestampPacketSizeControl::preferredPacketCount + 4; i++) {
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
EXPECT_EQ(0u, event->getPacketsInUse());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWorkGroupCountAndGlobalWorkSizeIsSetInCrossThreadData) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
|
||||
@@ -226,6 +226,18 @@ TEST_F(TimestampEventCreate, givenTimestampEventThenAllocationsIsOfPacketTagBuff
|
||||
EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, allocation->getAllocationType());
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsIncreasedThenCorrectOffsetIsReturned) {
|
||||
EXPECT_EQ(0u, event->getPacketsInUse());
|
||||
auto gpuAddr = event->getGpuAddress();
|
||||
EXPECT_EQ(gpuAddr, event->getTimestampPacketAddress());
|
||||
|
||||
event->increasePacketsInUse();
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
|
||||
gpuAddr += sizeof(TimestampPacketStorage::Packet);
|
||||
EXPECT_EQ(gpuAddr, event->getTimestampPacketAddress());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
|
||||
TimestampPacketStorage::Packet data = {};
|
||||
data.contextStart = 1u;
|
||||
@@ -233,10 +245,7 @@ HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryK
|
||||
data.globalStart = 3u;
|
||||
data.globalEnd = 4u;
|
||||
|
||||
event->increasePacketsInUse();
|
||||
EXPECT_EQ(event->getPacketsInUse(), 1u);
|
||||
event->hostAddress = &data;
|
||||
|
||||
ze_kernel_timestamp_result_t result = {};
|
||||
|
||||
event->queryKernelTimestamp(&result);
|
||||
|
||||
Reference in New Issue
Block a user