Refactor TimestampPacket class

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2021-03-22 15:14:49 +00:00 committed by Compute-Runtime-Automation
parent 689028992a
commit 3dc3ad36f8
11 changed files with 159 additions and 128 deletions

View File

@ -147,24 +147,23 @@ uint64_t Event::getTimestampPacketAddress() {
}
ze_result_t EventImp::calculateProfilingData() {
globalStartTS = timestampsData->packets[0].globalStart;
globalEndTS = timestampsData->packets[0].globalEnd;
contextStartTS = timestampsData->packets[0].contextStart;
contextEndTS = timestampsData->packets[0].contextEnd;
globalStartTS = timestampsData->getGlobalStartValue(0);
globalEndTS = timestampsData->getGlobalEndValue(0);
contextStartTS = timestampsData->getContextStartValue(0);
contextEndTS = timestampsData->getContextEndValue(0);
for (auto i = 1u; i < packetsInUse; i++) {
auto &packet = timestampsData->packets[i];
if (globalStartTS > packet.globalStart) {
globalStartTS = packet.globalStart;
if (globalStartTS > timestampsData->getGlobalStartValue(i)) {
globalStartTS = timestampsData->getGlobalStartValue(i);
}
if (contextStartTS > packet.contextStart) {
contextStartTS = packet.contextStart;
if (contextStartTS > timestampsData->getContextStartValue(i)) {
contextStartTS = timestampsData->getContextStartValue(i);
}
if (contextEndTS < packet.contextEnd) {
contextEndTS = packet.contextEnd;
if (contextEndTS < timestampsData->getContextEndValue(i)) {
contextEndTS = timestampsData->getContextEndValue(i);
}
if (globalEndTS < packet.globalEnd) {
globalEndTS = packet.globalEnd;
if (globalEndTS < timestampsData->getGlobalEndValue(i)) {
globalEndTS = timestampsData->getGlobalEndValue(i);
}
}
@ -172,20 +171,11 @@ ze_result_t EventImp::calculateProfilingData() {
}
void EventImp::assignTimestampData(void *address) {
auto baseAddr = reinterpret_cast<uint64_t>(address);
uint32_t packetsToCopy = packetsInUse ? packetsInUse : NEO::TimestampPacketSizeControl::preferredPacketCount;
auto copyData = [&](uint32_t &timestampField, auto tsAddr) {
memcpy_s(static_cast<void *>(&timestampField), sizeof(uint32_t), reinterpret_cast<void *>(tsAddr), sizeof(uint32_t));
};
for (uint32_t i = 0; i < packetsToCopy; i++) {
auto &packet = timestampsData->packets[i];
copyData(packet.globalStart, baseAddr + offsetof(TimestampPacketStorage::Packet, globalStart));
copyData(packet.contextStart, baseAddr + offsetof(TimestampPacketStorage::Packet, contextStart));
copyData(packet.globalEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, globalEnd));
copyData(packet.contextEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, contextEnd));
baseAddr += sizeof(struct TimestampPacketStorage::Packet);
timestampsData->assignDataToAllTimestamps(i, address);
address = ptrOffset(address, sizeof(struct TimestampPacketStorage::Packet));
}
}
@ -215,7 +205,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
auto signalScopeFlag = this->signalScope;
auto eventTsSetFunc = [&](auto tsAddr) {
auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) {
auto tsptr = reinterpret_cast<void *>(tsAddr);
memcpy_s(tsptr, sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t));

View File

@ -276,11 +276,10 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor
EXPECT_NE(nullptr, event->timestampsData);
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
auto &packet = event->timestampsData->packets[i];
EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextStart);
EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalStart);
EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextEnd);
EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalEnd);
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getGlobalStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getContextEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getGlobalEndValue(i));
}
EXPECT_EQ(0u, event->getPacketsInUse());

View File

@ -258,13 +258,12 @@ bool Event::calcProfilingData() {
for (auto i = 0u; i < timestamps.size(); i++) {
std::cout << "Timestamp " << i << ", "
<< "profiling capable: " << timestamps[i]->isProfilingCapable() << ", ";
for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->packetsUsed; j++) {
const auto &packet = timestamps[i]->tagForCpuAccess->packets[j];
for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->getPacketsUsed(); j++) {
std::cout << "packet " << j << ": "
<< "global start: " << packet.globalStart << ", "
<< "global end: " << packet.globalEnd << ", "
<< "context start: " << packet.contextStart << ", "
<< "context end: " << packet.contextEnd << std::endl;
<< "global start: " << timestamps[i]->tagForCpuAccess->getGlobalStartValue(j) << ", "
<< "global end: " << timestamps[i]->tagForCpuAccess->getGlobalEndValue(j) << ", "
<< "context start: " << timestamps[i]->tagForCpuAccess->getContextStartValue(j) << ", "
<< "context end: " << timestamps[i]->tagForCpuAccess->getContextEndValue(j) << std::endl;
}
}
}
@ -347,20 +346,19 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
void Event::getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS) {
const auto timestamps = timestampContainer->peekNodes();
globalStartTS = timestamps[0]->tagForCpuAccess->packets[0].globalStart;
globalEndTS = timestamps[0]->tagForCpuAccess->packets[0].globalEnd;
globalStartTS = timestamps[0]->tagForCpuAccess->getGlobalStartValue(0);
globalEndTS = timestamps[0]->tagForCpuAccess->getGlobalEndValue(0);
for (const auto &timestamp : timestamps) {
if (!timestamp->isProfilingCapable()) {
continue;
}
for (auto i = 0u; i < timestamp->tagForCpuAccess->packetsUsed; ++i) {
const auto &packet = timestamp->tagForCpuAccess->packets[i];
if (globalStartTS > packet.globalStart) {
globalStartTS = packet.globalStart;
for (auto i = 0u; i < timestamp->tagForCpuAccess->getPacketsUsed(); ++i) {
if (globalStartTS > timestamp->tagForCpuAccess->getGlobalStartValue(i)) {
globalStartTS = timestamp->tagForCpuAccess->getGlobalStartValue(i);
}
if (globalEndTS < packet.globalEnd) {
globalEndTS = packet.globalEnd;
if (globalEndTS < timestamp->tagForCpuAccess->getGlobalEndValue(i)) {
globalEndTS = timestamp->tagForCpuAccess->getGlobalEndValue(i);
}
}
}

View File

@ -290,12 +290,13 @@ struct CommandStreamReceiverTagTests : public ::testing::Test {
auto allocator = csr.getTimestampPacketAllocator();
auto tag = allocator->getTag();
for (auto &packet : tag->tagForCpuAccess->packets) {
packet.contextStart = 0;
packet.globalStart = 0;
packet.contextEnd = 0;
packet.globalEnd = 0;
uint32_t zeros[4] = {};
for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) {
tag->tagForCpuAccess->assignDataToAllTimestamps(i, zeros);
}
EXPECT_TRUE(tag->tagForCpuAccess->isCompleted());
bool canBeReleased = tag->canBeReleased();

View File

@ -36,17 +36,23 @@
using namespace NEO;
struct TimestampPacketSimpleTests : public ::testing::Test {
void setTagToReadyState(TagNode<TimestampPacketStorage> *tagNode) {
auto packetsUsed = tagNode->tagForCpuAccess->packetsUsed;
class MockTimestampPacketStorage : public TimestampPacketStorage {
public:
using TimestampPacketStorage::implicitGpuDependenciesCount;
using TimestampPacketStorage::packets;
};
template <typename TimestampPacketStorageT>
void setTagToReadyState(TagNode<TimestampPacketStorageT> *tagNode) {
auto packetsUsed = tagNode->tagForCpuAccess->getPacketsUsed();
tagNode->initialize();
for (auto &packet : tagNode->tagForCpuAccess->packets) {
packet.contextStart = 0u;
packet.globalStart = 0u;
packet.contextEnd = 0u;
packet.globalEnd = 0u;
uint32_t zeros[4] = {};
for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) {
tagNode->tagForCpuAccess->assignDataToAllTimestamps(i, zeros);
}
tagNode->tagForCpuAccess->packetsUsed = packetsUsed;
tagNode->tagForCpuAccess->setPacketsUsed(packetsUsed);
}
const size_t gws[3] = {1, 1, 1};
@ -139,7 +145,7 @@ HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenComman
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
TimestampPacketStorage tag;
MockTimestampPacketStorage tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@ -178,7 +184,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
TimestampPacketStorage tag;
tag.packetsUsed = 2;
tag.setPacketsUsed(2);
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@ -189,7 +195,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
for (uint32_t packetId = 0; packetId < tag.packetsUsed; packetId++) {
for (uint32_t packetId = 0; packetId < tag.getPacketsUsed(); packetId++) {
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, packetId);
}
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
@ -201,15 +207,15 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
auto expectedEndAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t));
EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode));
auto expectedCounterAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount);
auto expectedCounterAddress = mockNode.getGpuAddress() + (TimestampPacketSizeControl::preferredPacketCount * 4 * sizeof(uint32_t));
EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode));
}
TEST_F(TimestampPacketSimpleTests, whenEndTagIsNotOneThenMarkAsCompleted) {
TimestampPacketStorage timestampPacketStorage;
MockTimestampPacketStorage timestampPacketStorage;
auto &packet = timestampPacketStorage.packets[0];
timestampPacketStorage.initialize();
@ -266,7 +272,7 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov
}
TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) {
TimestampPacketStorage timestampPacketStorage;
MockTimestampPacketStorage timestampPacketStorage;
auto &packet = timestampPacketStorage.packets[0];
timestampPacketStorage.initialize();
@ -278,29 +284,29 @@ TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTim
}
TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) {
TimestampPacketStorage timestampPacketStorage;
MockTimestampPacketStorage timestampPacketStorage;
auto &packets = timestampPacketStorage.packets;
timestampPacketStorage.initialize();
timestampPacketStorage.packetsUsed = TimestampPacketSizeControl::preferredPacketCount - 1;
timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1);
for (uint32_t i = 0; i < timestampPacketStorage.packetsUsed - 1; i++) {
for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) {
packets[i].contextEnd = 0;
packets[i].globalEnd = 0;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
}
packets[timestampPacketStorage.packetsUsed - 1].contextEnd = 0;
packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
packets[timestampPacketStorage.packetsUsed - 1].globalEnd = 0;
packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
}
TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
MockMemoryManager memoryManager(executionEnvironment);
MockTagAllocator<TimestampPacketStorage> allocator(0, &memoryManager, 1);
MockTagAllocator<MockTimestampPacketStorage> allocator(0, &memoryManager, 1);
auto firstNode = allocator.getTag();
auto i = 0u;
@ -328,11 +334,11 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
EXPECT_EQ(1u, packet.contextEnd);
EXPECT_EQ(1u, packet.globalEnd);
}
EXPECT_EQ(1u, firstNode->tagForCpuAccess->packetsUsed);
EXPECT_EQ(1u, firstNode->tagForCpuAccess->getPacketsUsed());
}
TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) {
TimestampPacketStorage timestampPacketStorage;
MockTimestampPacketStorage timestampPacketStorage;
EXPECT_EQ(TimestampPacketSizeControl::preferredPacketCount * sizeof(timestampPacketStorage.packets[0]), sizeof(timestampPacketStorage.packets));
for (const auto &packet : timestampPacketStorage.packets) {
@ -341,7 +347,7 @@ TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) {
EXPECT_EQ(1u, packet.contextEnd);
EXPECT_EQ(1u, packet.globalEnd);
}
EXPECT_EQ(1u, timestampPacketStorage.packetsUsed);
EXPECT_EQ(1u, timestampPacketStorage.getPacketsUsed());
}
HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferredTagPoolSizeThenReturnCorrectValue) {
@ -361,12 +367,8 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc
EXPECT_EQ(1u, csr.getPreferredTagPoolSize());
auto tag = csr.getTimestampPacketAllocator()->getTag();
for (auto &packet : tag->tagForCpuAccess->packets) {
packet.contextStart = 0;
packet.globalStart = 0;
packet.contextEnd = 0;
packet.globalEnd = 0;
}
setTagToReadyState(tag);
EXPECT_TRUE(tag->tagForCpuAccess->isCompleted());
EXPECT_FALSE(tag->canBeReleased());
}
@ -556,13 +558,13 @@ HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorr
size_t sizeForNodeDependency = 0;
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(mockNode);
size_t expectedSize = mockNode.tagForCpuAccess->packetsUsed * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
EXPECT_EQ(expectedSize, sizeForNodeDependency);
}
HWTEST_F(TimestampPacketTests, givenTagNodeWhenCpuAndGpuDependenciesCountAreEqualThenCanBeReleased) {
TimestampPacketStorage tag;
MockTimestampPacketStorage tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;

View File

@ -2183,8 +2183,12 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
container.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u;
container.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u;
uint32_t data[4] = {static_cast<uint32_t>(container.getNode(0u)->tagForCpuAccess->getContextStartValue(0)),
static_cast<uint32_t>(container.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0)),
2, 2};
container.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
@ -2192,8 +2196,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u;
subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u;
data[0] = static_cast<uint32_t>(subdeviceContainer.getNode(0u)->tagForCpuAccess->getContextStartValue(0));
data[1] = static_cast<uint32_t>(subdeviceContainer.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0));
data[2] = 2;
data[3] = 2;
subdeviceContainer.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
@ -2203,8 +2212,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->globalEnd = 2u;
subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->contextEnd = 2u;
data[0] = static_cast<uint32_t>(subdeviceContainer.getNode(1u)->tagForCpuAccess->getContextStartValue(0));
data[1] = static_cast<uint32_t>(subdeviceContainer.getNode(1u)->tagForCpuAccess->getGlobalStartValue(0));
data[2] = 2;
data[3] = 2;
subdeviceContainer.getNode(1u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);

View File

@ -1078,36 +1078,33 @@ struct ProfilingTimestampPacketsTest : public ::testing::Test {
ev->timestampPacketContainer = std::make_unique<MockTimestampContainer>();
}
void addTimestampNode(int contextStart, int contextEnd, int globalStart, int globalEnd) {
void addTimestampNode(uint32_t contextStart, uint32_t contextEnd, uint32_t globalStart, uint32_t globalEnd) {
auto node = new MockTagNode<TimestampPacketStorage>();
auto timestampPacketStorage = new TimestampPacketStorage();
node->tagForCpuAccess = timestampPacketStorage;
timestampPacketStorage->packets[0].contextStart = contextStart;
timestampPacketStorage->packets[0].contextEnd = contextEnd;
timestampPacketStorage->packets[0].globalStart = globalStart;
timestampPacketStorage->packets[0].globalEnd = globalEnd;
uint32_t values[4] = {contextStart, globalStart, contextEnd, globalEnd};
timestampPacketStorage->assignDataToAllTimestamps(0, values);
ev->timestampPacketContainer->add(node);
}
void addTimestampNodeMultiOsContext(int globalStart[16], int globalEnd[16], int contextStart[16], int contextEnd[16], uint32_t size) {
void addTimestampNodeMultiOsContext(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t contextStart[16], uint32_t contextEnd[16], uint32_t size) {
auto node = new MockTagNode<TimestampPacketStorage>();
auto timestampPacketStorage = new TimestampPacketStorage();
timestampPacketStorage->packetsUsed = size;
timestampPacketStorage->setPacketsUsed(size);
for (uint32_t i = 0u; i < timestampPacketStorage->packetsUsed; ++i) {
timestampPacketStorage->packets[i].globalStart = globalStart[i];
timestampPacketStorage->packets[i].globalEnd = globalEnd[i];
timestampPacketStorage->packets[i].contextStart = contextStart[i];
timestampPacketStorage->packets[i].contextEnd = contextEnd[i];
for (uint32_t i = 0u; i < timestampPacketStorage->getPacketsUsed(); ++i) {
uint32_t values[4] = {contextStart[i], globalStart[i], contextEnd[i], globalEnd[i]};
timestampPacketStorage->assignDataToAllTimestamps(i, values);
}
node->tagForCpuAccess = timestampPacketStorage;
ev->timestampPacketContainer->add(node);
}
void initTimestampNodeMultiOsContextData(int globalStart[16], int globalEnd[16], uint32_t size) {
void initTimestampNodeMultiOsContextData(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t size) {
for (uint32_t i = 0u; i < size; ++i) {
globalStart[i] = 100;
@ -1149,10 +1146,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneEleme
}
TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCalcProfilingDataIsCalledThenCorrectedValuesAreReturned) {
int globalStart[16] = {0};
int globalEnd[16] = {0};
int contextStart[16] = {0};
int contextEnd[16] = {0};
uint32_t globalStart[16] = {0};
uint32_t globalEnd[16] = {0};
uint32_t contextStart[16] = {0};
uint32_t contextEnd[16] = {0};
initTimestampNodeMultiOsContextData(globalStart, globalEnd, 16u);
addTimestampNodeMultiOsContext(globalStart, globalEnd, contextStart, contextEnd, 16u);
auto &device = reinterpret_cast<MockDevice &>(cmdQ->getDevice());
@ -1165,15 +1162,15 @@ TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCal
}
TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) {
int globalStart0 = 20;
int globalEnd0 = 51;
int contextStart0 = 21;
int contextEnd0 = 50;
uint32_t globalStart0 = 20;
uint32_t globalEnd0 = 51;
uint32_t contextStart0 = 21;
uint32_t contextEnd0 = 50;
int globalStart1 = globalStart0 - 1;
int globalEnd1 = globalEnd0 + 1;
int contextStart1 = contextStart0 - 1;
int contextEnd1 = contextEnd0 + 1;
uint32_t globalStart1 = globalStart0 - 1;
uint32_t globalEnd1 = globalEnd0 + 1;
uint32_t contextStart1 = contextStart0 - 1;
uint32_t contextEnd1 = contextEnd0 + 1;
addTimestampNodeMultiOsContext(&globalStart0, &globalEnd0, &contextStart0, &contextEnd0, 1);
addTimestampNodeMultiOsContext(&globalStart1, &globalEnd1, &contextStart1, &contextEnd1, 1);
@ -1197,10 +1194,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCa
auto &csr = device.getUltCommandStreamReceiver<DEFAULT_TEST_FAMILY_NAME>();
csr.multiOsContextCapable = true;
int globalStart[16] = {0};
int globalEnd[16] = {0};
int contextStart[16] = {0};
int contextEnd[16] = {0};
uint32_t globalStart[16] = {0};
uint32_t globalEnd[16] = {0};
uint32_t contextStart[16] = {0};
uint32_t contextEnd[16] = {0};
for (int i = 0; i < 16; i++) {
globalStart[i] = 2 * i;
globalEnd[i] = 500 * i;

View File

@ -47,7 +47,7 @@ struct HwPerfCounter;
struct HwTimeStamps;
template <typename TSize>
struct TimestampPackets;
class TimestampPackets;
template <typename T1>
class TagAllocator;

View File

@ -1006,8 +1006,8 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
if (blitProperties.outputTimestampPacket && profilingEnabled) {
auto timestampContextStartGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextStart);
auto timestampGlobalStartAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalStart);
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress);
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress);
@ -1017,8 +1017,8 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
if (blitProperties.outputTimestampPacket) {
if (profilingEnabled) {
auto timestampContextEndGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto timestampGlobalEndAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalEnd);
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket);
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, 0llu, newTaskCount, false, false);

View File

@ -29,7 +29,7 @@ template <typename TagType>
struct TagNode;
template <typename TSize>
struct TimestampPackets;
class TimestampPackets;
struct BlitProperties;
struct HardwareInfo;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -12,6 +12,7 @@
#include "shared/source/helpers/aux_translation.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/string.h"
#include "shared/source/utilities/tag_allocator.h"
#include "pipe_control_args.h"
@ -30,7 +31,8 @@ constexpr uint32_t preferredPacketCount = 16u;
#pragma pack(1)
template <typename TSize>
struct TimestampPackets {
class TimestampPackets {
public:
struct Packet {
TSize contextStart = 1u;
TSize globalStart = 1u;
@ -67,8 +69,27 @@ struct TimestampPackets {
implicitGpuDependenciesCount = 0;
}
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) {
memcpy_s(&packets[packetIndex], sizeof(Packet), source, sizeof(Packet));
}
size_t getGlobalStartOffset() const { return ptrDiff(&packets[0].globalStart, this); }
size_t getContextStartOffset() const { return ptrDiff(&packets[0].contextStart, this); }
size_t getContextEndOffset() const { return ptrDiff(&packets[0].contextEnd, this); }
size_t getGlobalEndOffset() const { return ptrDiff(&packets[0].globalEnd, this); }
size_t getImplicitGpuDependenciesCountOffset() const { return ptrDiff(&implicitGpuDependenciesCount, this); }
uint64_t getContextStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].contextStart); }
uint64_t getGlobalStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].globalStart); }
uint64_t getContextEndValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].contextEnd); }
uint64_t getGlobalEndValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].globalEnd); }
void setPacketsUsed(uint32_t used) { packetsUsed = used; }
uint32_t getPacketsUsed() const { return packetsUsed; }
uint32_t getImplicitGpuDependenciesCount() const { return implicitGpuDependenciesCount; }
protected:
Packet packets[TimestampPacketSizeControl::preferredPacketCount];
uint32_t implicitGpuDependenciesCount = 0;
uint32_t packetsUsed = 1;
@ -110,11 +131,20 @@ struct TimestampPacketDependencies : public NonCopyableClass {
struct TimestampPacketHelper {
static uint64_t getContextEndGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextEndOffset();
}
static uint64_t getContextStartGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextStartOffset();
}
static uint64_t getGlobalEndGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalEndOffset();
}
static uint64_t getGlobalStartGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalStartOffset();
}
static uint64_t getGpuDependenciesCountGpuAddress(const TagNode<TimestampPacketStorage> &timestampPacketNode) {
return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount);
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
}
static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices);
@ -128,7 +158,7 @@ struct TimestampPacketHelper {
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode);
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) {
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->getPacketsUsed(); packetId++) {
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
}
@ -202,7 +232,7 @@ struct TimestampPacketHelper {
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> &timestampPacketNode) {
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
}