Use only first event packet when PC

If PIPE_CONTROL is used to collect timestamps, only the first event packet is
used and overwritten

Signed-off-by: Daria Hinz <daria.hinz@intel.com>
Related-To: NEO-5611
This commit is contained in:
Daria Hinz
2021-03-05 12:45:32 +01:00
committed by Compute-Runtime-Automation
parent eb34c42e02
commit bd35d2b3bd
7 changed files with 53 additions and 23 deletions

View File

@@ -1555,10 +1555,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
}
if (beforeWalker) {
event->increasePacketsInUse();
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1650,13 +1646,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
auto dstptrAllocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents);
commandContainer.addToResidencyContainer(dstptrAllocationStruct.alloc);
std::unique_ptr<EventData[]> timestampsAddress = std::make_unique<EventData[]>(numEvents);
std::unique_ptr<EventData[]> timestampsData = std::make_unique<EventData[]>(numEvents);
for (uint32_t i = 0u; i < numEvents; ++i) {
auto event = Event::fromHandle(phEvents[i]);
commandContainer.addToResidencyContainer(&event->getAllocation());
timestampsAddress[i].address = event->getGpuAddress();
timestampsAddress[i].packetsInUse = event->getPacketsInUse();
timestampsData[i].address = event->getGpuAddress();
timestampsData[i].packetsInUse = event->getPacketsInUse();
}
size_t alignedSize = alignUp<size_t>(sizeof(EventData) * numEvents, MemoryConstants::pageSize64k);
@@ -1670,14 +1666,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
false,
devices};
NEO::GraphicsAllocation *timestampsGPUAddress = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
NEO::GraphicsAllocation *timestampsGPUData = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(timestampsGPUAddress == nullptr);
UNRECOVERABLE_IF(timestampsGPUData == nullptr);
commandContainer.addToResidencyContainer(timestampsGPUAddress);
commandContainer.getDeallocationContainer().push_back(timestampsGPUAddress);
commandContainer.addToResidencyContainer(timestampsGPUData);
commandContainer.getDeallocationContainer().push_back(timestampsGPUData);
bool result = device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(timestampsGPUAddress, 0, timestampsAddress.get(), sizeof(EventData) * numEvents);
bool result = device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(timestampsGPUData, 0, timestampsData.get(), sizeof(EventData) * numEvents);
UNRECOVERABLE_IF(!result);
@@ -1717,7 +1713,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
auto dstValPtr = static_cast<uintptr_t>(dstptrAllocationStruct.alloc->getGpuAddress());
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUAddress->getGpuAddress()), timestampsGPUAddress);
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstptrAllocationStruct.alloc);
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,

View File

@@ -54,7 +54,7 @@ struct Event : _ze_event_handle_t {
void *hostAddress = nullptr;
uint64_t gpuAddress;
uint32_t packetsInUse;
uint32_t packetsInUse = 0u;
ze_event_scope_flags_t signalScope = 0u;
ze_event_scope_flags_t waitScope = 0u;

View File

@@ -1043,11 +1043,10 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCo
auto baseAddr = event->getGpuAddress();
auto contextOffset = offsetof(TimestampPacketStorage::Packet, contextStart);
auto globalOffset = offsetof(TimestampPacketStorage::Packet, globalStart);
EXPECT_EQ(event->getPacketsInUse(), 0u);
EXPECT_EQ(event->getTimestampPacketAddress(), baseAddr);
commandList->appendEventForProfilingCopyCommand(event->toHandle(), true);
EXPECT_EQ(event->getPacketsInUse(), 1u);
EXPECT_EQ(event->getPacketsInUse(), 0u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(

View File

@@ -1090,10 +1090,9 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopy
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
EXPECT_EQ(event->getPacketsInUse(), 0u);
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyBlitCalledTimes, 1u);
EXPECT_EQ(event->getPacketsInUse(), 1u);
EXPECT_EQ(event->getPacketsInUse(), 0u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -1177,12 +1176,11 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
EXPECT_EQ(event->getPacketsInUse(), 0u);
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_EQ(commandList.appendMemoryCopyBlitCalledTimes, 0u);
EXPECT_EQ(event->getPacketsInUse(), 1u);
EXPECT_EQ(event->getPacketsInUse(), 0u);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(

View File

@@ -137,6 +137,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
commandList->appendEventReset(event->toHandle());
ASSERT_EQ(0u, event->getPacketsInUse());
auto contextOffset = offsetof(TimestampPacketStorage::Packet, contextEnd);
auto baseAddr = event->getGpuAddress();
auto gpuAddress = ptrOffset(baseAddr, contextOffset);

View File

@@ -489,6 +489,33 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
commandList->cmdQImmediate = nullptr;
}
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelSeveralTimesThenAlwaysFirstEventPacketIsUsed, SupportedPlatforms) {
createKernel();
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
const ze_event_desc_t eventDesc = {
ZE_STRUCTURE_TYPE_EVENT_DESC,
nullptr,
0,
ZE_EVENT_SCOPE_FLAG_HOST,
ZE_EVENT_SCOPE_FLAG_HOST};
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<Event>(Event::create(eventPool.get(), &eventDesc, device));
EXPECT_EQ(0u, event->getPacketsInUse());
ze_group_count_t groupCount{1, 1, 1};
for (uint32_t i = 0; i < NEO::TimestampPacketSizeControl::preferredPacketCount + 4; i++) {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
EXPECT_EQ(0u, event->getPacketsInUse());
}
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWorkGroupCountAndGlobalWorkSizeIsSetInCrossThreadData) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;

View File

@@ -226,6 +226,18 @@ TEST_F(TimestampEventCreate, givenTimestampEventThenAllocationsIsOfPacketTagBuff
EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, allocation->getAllocationType());
}
TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsIncreasedThenCorrectOffsetIsReturned) {
EXPECT_EQ(0u, event->getPacketsInUse());
auto gpuAddr = event->getGpuAddress();
EXPECT_EQ(gpuAddr, event->getTimestampPacketAddress());
event->increasePacketsInUse();
EXPECT_EQ(1u, event->getPacketsInUse());
gpuAddr += sizeof(TimestampPacketStorage::Packet);
EXPECT_EQ(gpuAddr, event->getTimestampPacketAddress());
}
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
TimestampPacketStorage::Packet data = {};
data.contextStart = 1u;
@@ -233,10 +245,7 @@ HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryK
data.globalStart = 3u;
data.globalEnd = 4u;
event->increasePacketsInUse();
EXPECT_EQ(event->getPacketsInUse(), 1u);
event->hostAddress = &data;
ze_kernel_timestamp_result_t result = {};
event->queryKernelTimestamp(&result);