Refactor TimestampPacket class
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
689028992a
commit
3dc3ad36f8
|
@ -147,24 +147,23 @@ uint64_t Event::getTimestampPacketAddress() {
|
|||
}
|
||||
|
||||
ze_result_t EventImp::calculateProfilingData() {
|
||||
globalStartTS = timestampsData->packets[0].globalStart;
|
||||
globalEndTS = timestampsData->packets[0].globalEnd;
|
||||
contextStartTS = timestampsData->packets[0].contextStart;
|
||||
contextEndTS = timestampsData->packets[0].contextEnd;
|
||||
globalStartTS = timestampsData->getGlobalStartValue(0);
|
||||
globalEndTS = timestampsData->getGlobalEndValue(0);
|
||||
contextStartTS = timestampsData->getContextStartValue(0);
|
||||
contextEndTS = timestampsData->getContextEndValue(0);
|
||||
|
||||
for (auto i = 1u; i < packetsInUse; i++) {
|
||||
auto &packet = timestampsData->packets[i];
|
||||
if (globalStartTS > packet.globalStart) {
|
||||
globalStartTS = packet.globalStart;
|
||||
if (globalStartTS > timestampsData->getGlobalStartValue(i)) {
|
||||
globalStartTS = timestampsData->getGlobalStartValue(i);
|
||||
}
|
||||
if (contextStartTS > packet.contextStart) {
|
||||
contextStartTS = packet.contextStart;
|
||||
if (contextStartTS > timestampsData->getContextStartValue(i)) {
|
||||
contextStartTS = timestampsData->getContextStartValue(i);
|
||||
}
|
||||
if (contextEndTS < packet.contextEnd) {
|
||||
contextEndTS = packet.contextEnd;
|
||||
if (contextEndTS < timestampsData->getContextEndValue(i)) {
|
||||
contextEndTS = timestampsData->getContextEndValue(i);
|
||||
}
|
||||
if (globalEndTS < packet.globalEnd) {
|
||||
globalEndTS = packet.globalEnd;
|
||||
if (globalEndTS < timestampsData->getGlobalEndValue(i)) {
|
||||
globalEndTS = timestampsData->getGlobalEndValue(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -172,20 +171,11 @@ ze_result_t EventImp::calculateProfilingData() {
|
|||
}
|
||||
|
||||
void EventImp::assignTimestampData(void *address) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(address);
|
||||
uint32_t packetsToCopy = packetsInUse ? packetsInUse : NEO::TimestampPacketSizeControl::preferredPacketCount;
|
||||
|
||||
auto copyData = [&](uint32_t ×tampField, auto tsAddr) {
|
||||
memcpy_s(static_cast<void *>(×tampField), sizeof(uint32_t), reinterpret_cast<void *>(tsAddr), sizeof(uint32_t));
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < packetsToCopy; i++) {
|
||||
auto &packet = timestampsData->packets[i];
|
||||
copyData(packet.globalStart, baseAddr + offsetof(TimestampPacketStorage::Packet, globalStart));
|
||||
copyData(packet.contextStart, baseAddr + offsetof(TimestampPacketStorage::Packet, contextStart));
|
||||
copyData(packet.globalEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, globalEnd));
|
||||
copyData(packet.contextEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, contextEnd));
|
||||
baseAddr += sizeof(struct TimestampPacketStorage::Packet);
|
||||
timestampsData->assignDataToAllTimestamps(i, address);
|
||||
address = ptrOffset(address, sizeof(struct TimestampPacketStorage::Packet));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,7 +205,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
|
|||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
auto signalScopeFlag = this->signalScope;
|
||||
|
||||
auto eventTsSetFunc = [&](auto tsAddr) {
|
||||
auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) {
|
||||
auto tsptr = reinterpret_cast<void *>(tsAddr);
|
||||
|
||||
memcpy_s(tsptr, sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t));
|
||||
|
|
|
@ -276,11 +276,10 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor
|
|||
EXPECT_NE(nullptr, event->timestampsData);
|
||||
|
||||
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
|
||||
auto &packet = event->timestampsData->packets[i];
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextStart);
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalStart);
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextEnd);
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalEnd);
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getContextStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getGlobalStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getContextEndValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->timestampsData->getGlobalEndValue(i));
|
||||
}
|
||||
|
||||
EXPECT_EQ(0u, event->getPacketsInUse());
|
||||
|
|
|
@ -258,13 +258,12 @@ bool Event::calcProfilingData() {
|
|||
for (auto i = 0u; i < timestamps.size(); i++) {
|
||||
std::cout << "Timestamp " << i << ", "
|
||||
<< "profiling capable: " << timestamps[i]->isProfilingCapable() << ", ";
|
||||
for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->packetsUsed; j++) {
|
||||
const auto &packet = timestamps[i]->tagForCpuAccess->packets[j];
|
||||
for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->getPacketsUsed(); j++) {
|
||||
std::cout << "packet " << j << ": "
|
||||
<< "global start: " << packet.globalStart << ", "
|
||||
<< "global end: " << packet.globalEnd << ", "
|
||||
<< "context start: " << packet.contextStart << ", "
|
||||
<< "context end: " << packet.contextEnd << std::endl;
|
||||
<< "global start: " << timestamps[i]->tagForCpuAccess->getGlobalStartValue(j) << ", "
|
||||
<< "global end: " << timestamps[i]->tagForCpuAccess->getGlobalEndValue(j) << ", "
|
||||
<< "context start: " << timestamps[i]->tagForCpuAccess->getContextStartValue(j) << ", "
|
||||
<< "context end: " << timestamps[i]->tagForCpuAccess->getContextEndValue(j) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -347,20 +346,19 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
|||
void Event::getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS) {
|
||||
const auto timestamps = timestampContainer->peekNodes();
|
||||
|
||||
globalStartTS = timestamps[0]->tagForCpuAccess->packets[0].globalStart;
|
||||
globalEndTS = timestamps[0]->tagForCpuAccess->packets[0].globalEnd;
|
||||
globalStartTS = timestamps[0]->tagForCpuAccess->getGlobalStartValue(0);
|
||||
globalEndTS = timestamps[0]->tagForCpuAccess->getGlobalEndValue(0);
|
||||
|
||||
for (const auto ×tamp : timestamps) {
|
||||
if (!timestamp->isProfilingCapable()) {
|
||||
continue;
|
||||
}
|
||||
for (auto i = 0u; i < timestamp->tagForCpuAccess->packetsUsed; ++i) {
|
||||
const auto &packet = timestamp->tagForCpuAccess->packets[i];
|
||||
if (globalStartTS > packet.globalStart) {
|
||||
globalStartTS = packet.globalStart;
|
||||
for (auto i = 0u; i < timestamp->tagForCpuAccess->getPacketsUsed(); ++i) {
|
||||
if (globalStartTS > timestamp->tagForCpuAccess->getGlobalStartValue(i)) {
|
||||
globalStartTS = timestamp->tagForCpuAccess->getGlobalStartValue(i);
|
||||
}
|
||||
if (globalEndTS < packet.globalEnd) {
|
||||
globalEndTS = packet.globalEnd;
|
||||
if (globalEndTS < timestamp->tagForCpuAccess->getGlobalEndValue(i)) {
|
||||
globalEndTS = timestamp->tagForCpuAccess->getGlobalEndValue(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -290,12 +290,13 @@ struct CommandStreamReceiverTagTests : public ::testing::Test {
|
|||
|
||||
auto allocator = csr.getTimestampPacketAllocator();
|
||||
auto tag = allocator->getTag();
|
||||
for (auto &packet : tag->tagForCpuAccess->packets) {
|
||||
packet.contextStart = 0;
|
||||
packet.globalStart = 0;
|
||||
packet.contextEnd = 0;
|
||||
packet.globalEnd = 0;
|
||||
|
||||
uint32_t zeros[4] = {};
|
||||
|
||||
for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) {
|
||||
tag->tagForCpuAccess->assignDataToAllTimestamps(i, zeros);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(tag->tagForCpuAccess->isCompleted());
|
||||
|
||||
bool canBeReleased = tag->canBeReleased();
|
||||
|
|
|
@ -36,17 +36,23 @@
|
|||
using namespace NEO;
|
||||
|
||||
struct TimestampPacketSimpleTests : public ::testing::Test {
|
||||
void setTagToReadyState(TagNode<TimestampPacketStorage> *tagNode) {
|
||||
auto packetsUsed = tagNode->tagForCpuAccess->packetsUsed;
|
||||
class MockTimestampPacketStorage : public TimestampPacketStorage {
|
||||
public:
|
||||
using TimestampPacketStorage::implicitGpuDependenciesCount;
|
||||
using TimestampPacketStorage::packets;
|
||||
};
|
||||
|
||||
template <typename TimestampPacketStorageT>
|
||||
void setTagToReadyState(TagNode<TimestampPacketStorageT> *tagNode) {
|
||||
auto packetsUsed = tagNode->tagForCpuAccess->getPacketsUsed();
|
||||
tagNode->initialize();
|
||||
|
||||
for (auto &packet : tagNode->tagForCpuAccess->packets) {
|
||||
packet.contextStart = 0u;
|
||||
packet.globalStart = 0u;
|
||||
packet.contextEnd = 0u;
|
||||
packet.globalEnd = 0u;
|
||||
uint32_t zeros[4] = {};
|
||||
|
||||
for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) {
|
||||
tagNode->tagForCpuAccess->assignDataToAllTimestamps(i, zeros);
|
||||
}
|
||||
tagNode->tagForCpuAccess->packetsUsed = packetsUsed;
|
||||
tagNode->tagForCpuAccess->setPacketsUsed(packetsUsed);
|
||||
}
|
||||
|
||||
const size_t gws[3] = {1, 1, 1};
|
||||
|
@ -139,7 +145,7 @@ HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenComman
|
|||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
TimestampPacketStorage tag;
|
||||
MockTimestampPacketStorage tag;
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
|
@ -178,7 +184,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
|
|||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
TimestampPacketStorage tag;
|
||||
tag.packetsUsed = 2;
|
||||
tag.setPacketsUsed(2);
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
|
@ -189,7 +195,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
|
|||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
auto it = hwParser.cmdList.begin();
|
||||
for (uint32_t packetId = 0; packetId < tag.packetsUsed; packetId++) {
|
||||
for (uint32_t packetId = 0; packetId < tag.getPacketsUsed(); packetId++) {
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, packetId);
|
||||
}
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
|
||||
|
@ -201,15 +207,15 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr
|
|||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
|
||||
auto expectedEndAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode));
|
||||
|
||||
auto expectedCounterAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount);
|
||||
auto expectedCounterAddress = mockNode.getGpuAddress() + (TimestampPacketSizeControl::preferredPacketCount * 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode));
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenEndTagIsNotOneThenMarkAsCompleted) {
|
||||
TimestampPacketStorage timestampPacketStorage;
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packet = timestampPacketStorage.packets[0];
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
|
@ -266,7 +272,7 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov
|
|||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) {
|
||||
TimestampPacketStorage timestampPacketStorage;
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packet = timestampPacketStorage.packets[0];
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
|
@ -278,29 +284,29 @@ TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTim
|
|||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) {
|
||||
TimestampPacketStorage timestampPacketStorage;
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packets = timestampPacketStorage.packets;
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
timestampPacketStorage.packetsUsed = TimestampPacketSizeControl::preferredPacketCount - 1;
|
||||
timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1);
|
||||
|
||||
for (uint32_t i = 0; i < timestampPacketStorage.packetsUsed - 1; i++) {
|
||||
for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) {
|
||||
packets[i].contextEnd = 0;
|
||||
packets[i].globalEnd = 0;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
packets[timestampPacketStorage.packetsUsed - 1].contextEnd = 0;
|
||||
packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
|
||||
packets[timestampPacketStorage.packetsUsed - 1].globalEnd = 0;
|
||||
packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
|
||||
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
||||
MockMemoryManager memoryManager(executionEnvironment);
|
||||
MockTagAllocator<TimestampPacketStorage> allocator(0, &memoryManager, 1);
|
||||
MockTagAllocator<MockTimestampPacketStorage> allocator(0, &memoryManager, 1);
|
||||
|
||||
auto firstNode = allocator.getTag();
|
||||
auto i = 0u;
|
||||
|
@ -328,11 +334,11 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
|
|||
EXPECT_EQ(1u, packet.contextEnd);
|
||||
EXPECT_EQ(1u, packet.globalEnd);
|
||||
}
|
||||
EXPECT_EQ(1u, firstNode->tagForCpuAccess->packetsUsed);
|
||||
EXPECT_EQ(1u, firstNode->tagForCpuAccess->getPacketsUsed());
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) {
|
||||
TimestampPacketStorage timestampPacketStorage;
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
EXPECT_EQ(TimestampPacketSizeControl::preferredPacketCount * sizeof(timestampPacketStorage.packets[0]), sizeof(timestampPacketStorage.packets));
|
||||
|
||||
for (const auto &packet : timestampPacketStorage.packets) {
|
||||
|
@ -341,7 +347,7 @@ TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) {
|
|||
EXPECT_EQ(1u, packet.contextEnd);
|
||||
EXPECT_EQ(1u, packet.globalEnd);
|
||||
}
|
||||
EXPECT_EQ(1u, timestampPacketStorage.packetsUsed);
|
||||
EXPECT_EQ(1u, timestampPacketStorage.getPacketsUsed());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferredTagPoolSizeThenReturnCorrectValue) {
|
||||
|
@ -361,12 +367,8 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc
|
|||
EXPECT_EQ(1u, csr.getPreferredTagPoolSize());
|
||||
|
||||
auto tag = csr.getTimestampPacketAllocator()->getTag();
|
||||
for (auto &packet : tag->tagForCpuAccess->packets) {
|
||||
packet.contextStart = 0;
|
||||
packet.globalStart = 0;
|
||||
packet.contextEnd = 0;
|
||||
packet.globalEnd = 0;
|
||||
}
|
||||
setTagToReadyState(tag);
|
||||
|
||||
EXPECT_TRUE(tag->tagForCpuAccess->isCompleted());
|
||||
EXPECT_FALSE(tag->canBeReleased());
|
||||
}
|
||||
|
@ -556,13 +558,13 @@ HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorr
|
|||
size_t sizeForNodeDependency = 0;
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(mockNode);
|
||||
|
||||
size_t expectedSize = mockNode.tagForCpuAccess->packetsUsed * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
|
||||
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
|
||||
|
||||
EXPECT_EQ(expectedSize, sizeForNodeDependency);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWhenCpuAndGpuDependenciesCountAreEqualThenCanBeReleased) {
|
||||
TimestampPacketStorage tag;
|
||||
MockTimestampPacketStorage tag;
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
|
|
|
@ -2183,8 +2183,12 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
|
|||
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
|
||||
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
||||
|
||||
container.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u;
|
||||
container.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u;
|
||||
uint32_t data[4] = {static_cast<uint32_t>(container.getNode(0u)->tagForCpuAccess->getContextStartValue(0)),
|
||||
static_cast<uint32_t>(container.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0)),
|
||||
2, 2};
|
||||
|
||||
container.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
|
||||
|
||||
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
|
||||
|
||||
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||
|
@ -2192,8 +2196,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
|
|||
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
|
||||
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
||||
|
||||
subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u;
|
||||
subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u;
|
||||
data[0] = static_cast<uint32_t>(subdeviceContainer.getNode(0u)->tagForCpuAccess->getContextStartValue(0));
|
||||
data[1] = static_cast<uint32_t>(subdeviceContainer.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0));
|
||||
data[2] = 2;
|
||||
data[3] = 2;
|
||||
|
||||
subdeviceContainer.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
|
||||
|
||||
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
|
||||
|
||||
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||
|
@ -2203,8 +2212,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK
|
|||
EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS);
|
||||
EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
||||
|
||||
subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->globalEnd = 2u;
|
||||
subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->contextEnd = 2u;
|
||||
data[0] = static_cast<uint32_t>(subdeviceContainer.getNode(1u)->tagForCpuAccess->getContextStartValue(0));
|
||||
data[1] = static_cast<uint32_t>(subdeviceContainer.getNode(1u)->tagForCpuAccess->getGlobalStartValue(0));
|
||||
data[2] = 2;
|
||||
data[3] = 2;
|
||||
|
||||
subdeviceContainer.getNode(1u)->tagForCpuAccess->assignDataToAllTimestamps(0, data);
|
||||
|
||||
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
|
||||
|
||||
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||
|
|
|
@ -1078,36 +1078,33 @@ struct ProfilingTimestampPacketsTest : public ::testing::Test {
|
|||
ev->timestampPacketContainer = std::make_unique<MockTimestampContainer>();
|
||||
}
|
||||
|
||||
void addTimestampNode(int contextStart, int contextEnd, int globalStart, int globalEnd) {
|
||||
void addTimestampNode(uint32_t contextStart, uint32_t contextEnd, uint32_t globalStart, uint32_t globalEnd) {
|
||||
auto node = new MockTagNode<TimestampPacketStorage>();
|
||||
auto timestampPacketStorage = new TimestampPacketStorage();
|
||||
node->tagForCpuAccess = timestampPacketStorage;
|
||||
|
||||
timestampPacketStorage->packets[0].contextStart = contextStart;
|
||||
timestampPacketStorage->packets[0].contextEnd = contextEnd;
|
||||
timestampPacketStorage->packets[0].globalStart = globalStart;
|
||||
timestampPacketStorage->packets[0].globalEnd = globalEnd;
|
||||
uint32_t values[4] = {contextStart, globalStart, contextEnd, globalEnd};
|
||||
timestampPacketStorage->assignDataToAllTimestamps(0, values);
|
||||
|
||||
ev->timestampPacketContainer->add(node);
|
||||
}
|
||||
|
||||
void addTimestampNodeMultiOsContext(int globalStart[16], int globalEnd[16], int contextStart[16], int contextEnd[16], uint32_t size) {
|
||||
void addTimestampNodeMultiOsContext(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t contextStart[16], uint32_t contextEnd[16], uint32_t size) {
|
||||
auto node = new MockTagNode<TimestampPacketStorage>();
|
||||
auto timestampPacketStorage = new TimestampPacketStorage();
|
||||
timestampPacketStorage->packetsUsed = size;
|
||||
timestampPacketStorage->setPacketsUsed(size);
|
||||
|
||||
for (uint32_t i = 0u; i < timestampPacketStorage->packetsUsed; ++i) {
|
||||
timestampPacketStorage->packets[i].globalStart = globalStart[i];
|
||||
timestampPacketStorage->packets[i].globalEnd = globalEnd[i];
|
||||
timestampPacketStorage->packets[i].contextStart = contextStart[i];
|
||||
timestampPacketStorage->packets[i].contextEnd = contextEnd[i];
|
||||
for (uint32_t i = 0u; i < timestampPacketStorage->getPacketsUsed(); ++i) {
|
||||
uint32_t values[4] = {contextStart[i], globalStart[i], contextEnd[i], globalEnd[i]};
|
||||
|
||||
timestampPacketStorage->assignDataToAllTimestamps(i, values);
|
||||
}
|
||||
|
||||
node->tagForCpuAccess = timestampPacketStorage;
|
||||
ev->timestampPacketContainer->add(node);
|
||||
}
|
||||
|
||||
void initTimestampNodeMultiOsContextData(int globalStart[16], int globalEnd[16], uint32_t size) {
|
||||
void initTimestampNodeMultiOsContextData(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t size) {
|
||||
|
||||
for (uint32_t i = 0u; i < size; ++i) {
|
||||
globalStart[i] = 100;
|
||||
|
@ -1149,10 +1146,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneEleme
|
|||
}
|
||||
|
||||
TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCalcProfilingDataIsCalledThenCorrectedValuesAreReturned) {
|
||||
int globalStart[16] = {0};
|
||||
int globalEnd[16] = {0};
|
||||
int contextStart[16] = {0};
|
||||
int contextEnd[16] = {0};
|
||||
uint32_t globalStart[16] = {0};
|
||||
uint32_t globalEnd[16] = {0};
|
||||
uint32_t contextStart[16] = {0};
|
||||
uint32_t contextEnd[16] = {0};
|
||||
initTimestampNodeMultiOsContextData(globalStart, globalEnd, 16u);
|
||||
addTimestampNodeMultiOsContext(globalStart, globalEnd, contextStart, contextEnd, 16u);
|
||||
auto &device = reinterpret_cast<MockDevice &>(cmdQ->getDevice());
|
||||
|
@ -1165,15 +1162,15 @@ TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCal
|
|||
}
|
||||
|
||||
TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) {
|
||||
int globalStart0 = 20;
|
||||
int globalEnd0 = 51;
|
||||
int contextStart0 = 21;
|
||||
int contextEnd0 = 50;
|
||||
uint32_t globalStart0 = 20;
|
||||
uint32_t globalEnd0 = 51;
|
||||
uint32_t contextStart0 = 21;
|
||||
uint32_t contextEnd0 = 50;
|
||||
|
||||
int globalStart1 = globalStart0 - 1;
|
||||
int globalEnd1 = globalEnd0 + 1;
|
||||
int contextStart1 = contextStart0 - 1;
|
||||
int contextEnd1 = contextEnd0 + 1;
|
||||
uint32_t globalStart1 = globalStart0 - 1;
|
||||
uint32_t globalEnd1 = globalEnd0 + 1;
|
||||
uint32_t contextStart1 = contextStart0 - 1;
|
||||
uint32_t contextEnd1 = contextEnd0 + 1;
|
||||
|
||||
addTimestampNodeMultiOsContext(&globalStart0, &globalEnd0, &contextStart0, &contextEnd0, 1);
|
||||
addTimestampNodeMultiOsContext(&globalStart1, &globalEnd1, &contextStart1, &contextEnd1, 1);
|
||||
|
@ -1197,10 +1194,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCa
|
|||
auto &csr = device.getUltCommandStreamReceiver<DEFAULT_TEST_FAMILY_NAME>();
|
||||
csr.multiOsContextCapable = true;
|
||||
|
||||
int globalStart[16] = {0};
|
||||
int globalEnd[16] = {0};
|
||||
int contextStart[16] = {0};
|
||||
int contextEnd[16] = {0};
|
||||
uint32_t globalStart[16] = {0};
|
||||
uint32_t globalEnd[16] = {0};
|
||||
uint32_t contextStart[16] = {0};
|
||||
uint32_t contextEnd[16] = {0};
|
||||
for (int i = 0; i < 16; i++) {
|
||||
globalStart[i] = 2 * i;
|
||||
globalEnd[i] = 500 * i;
|
||||
|
|
|
@ -47,7 +47,7 @@ struct HwPerfCounter;
|
|||
struct HwTimeStamps;
|
||||
|
||||
template <typename TSize>
|
||||
struct TimestampPackets;
|
||||
class TimestampPackets;
|
||||
|
||||
template <typename T1>
|
||||
class TagAllocator;
|
||||
|
|
|
@ -1006,8 +1006,8 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
|
||||
if (blitProperties.outputTimestampPacket && profilingEnabled) {
|
||||
auto timestampContextStartGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextStart);
|
||||
auto timestampGlobalStartAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalStart);
|
||||
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket);
|
||||
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket);
|
||||
|
||||
EncodeStoreMMIO<GfxFamily>::encode(commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress);
|
||||
|
@ -1017,8 +1017,8 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
|
||||
if (blitProperties.outputTimestampPacket) {
|
||||
if (profilingEnabled) {
|
||||
auto timestampContextEndGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
auto timestampGlobalEndAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalEnd);
|
||||
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket);
|
||||
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket);
|
||||
|
||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, 0llu, newTaskCount, false, false);
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ template <typename TagType>
|
|||
struct TagNode;
|
||||
|
||||
template <typename TSize>
|
||||
struct TimestampPackets;
|
||||
class TimestampPackets;
|
||||
|
||||
struct BlitProperties;
|
||||
struct HardwareInfo;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -12,6 +12,7 @@
|
|||
#include "shared/source/helpers/aux_translation.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "pipe_control_args.h"
|
||||
|
@ -30,7 +31,8 @@ constexpr uint32_t preferredPacketCount = 16u;
|
|||
|
||||
#pragma pack(1)
|
||||
template <typename TSize>
|
||||
struct TimestampPackets {
|
||||
class TimestampPackets {
|
||||
public:
|
||||
struct Packet {
|
||||
TSize contextStart = 1u;
|
||||
TSize globalStart = 1u;
|
||||
|
@ -67,8 +69,27 @@ struct TimestampPackets {
|
|||
implicitGpuDependenciesCount = 0;
|
||||
}
|
||||
|
||||
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) {
|
||||
memcpy_s(&packets[packetIndex], sizeof(Packet), source, sizeof(Packet));
|
||||
}
|
||||
|
||||
size_t getGlobalStartOffset() const { return ptrDiff(&packets[0].globalStart, this); }
|
||||
size_t getContextStartOffset() const { return ptrDiff(&packets[0].contextStart, this); }
|
||||
size_t getContextEndOffset() const { return ptrDiff(&packets[0].contextEnd, this); }
|
||||
size_t getGlobalEndOffset() const { return ptrDiff(&packets[0].globalEnd, this); }
|
||||
size_t getImplicitGpuDependenciesCountOffset() const { return ptrDiff(&implicitGpuDependenciesCount, this); }
|
||||
|
||||
uint64_t getContextStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].contextStart); }
|
||||
uint64_t getGlobalStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].globalStart); }
|
||||
uint64_t getContextEndValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].contextEnd); }
|
||||
uint64_t getGlobalEndValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].globalEnd); }
|
||||
|
||||
void setPacketsUsed(uint32_t used) { packetsUsed = used; }
|
||||
uint32_t getPacketsUsed() const { return packetsUsed; }
|
||||
|
||||
uint32_t getImplicitGpuDependenciesCount() const { return implicitGpuDependenciesCount; }
|
||||
|
||||
protected:
|
||||
Packet packets[TimestampPacketSizeControl::preferredPacketCount];
|
||||
uint32_t implicitGpuDependenciesCount = 0;
|
||||
uint32_t packetsUsed = 1;
|
||||
|
@ -110,11 +131,20 @@ struct TimestampPacketDependencies : public NonCopyableClass {
|
|||
|
||||
struct TimestampPacketHelper {
|
||||
static uint64_t getContextEndGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextEndOffset();
|
||||
}
|
||||
static uint64_t getContextStartGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextStartOffset();
|
||||
}
|
||||
static uint64_t getGlobalEndGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalEndOffset();
|
||||
}
|
||||
static uint64_t getGlobalStartGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalStartOffset();
|
||||
}
|
||||
|
||||
static uint64_t getGpuDependenciesCountGpuAddress(const TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount);
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
|
||||
}
|
||||
|
||||
static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices);
|
||||
|
@ -128,7 +158,7 @@ struct TimestampPacketHelper {
|
|||
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
|
||||
auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode);
|
||||
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) {
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->getPacketsUsed(); packetId++) {
|
||||
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
}
|
||||
|
@ -202,7 +232,7 @@ struct TimestampPacketHelper {
|
|||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
|
||||
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue