Revert "SW WA to add PIPE_CONTROL with dcFlush enabled when event scope..."

This reverts commit c051495eb8

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2021-09-06 15:39:17 +00:00
committed by Compute-Runtime-Automation
parent 1db3f750ce
commit f727d26aa0
4 changed files with 70 additions and 94 deletions

View File

@@ -231,24 +231,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->partitionCount = std::max(partitionCount, this->partitionCount);
if (hEvent) {
auto event = Event::fromHandle(hEvent);
if (partitionCount > 1) {
if (isTimestampEvent && partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
if (L3FlushEnable) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
auto &hwHelper = this->device->getHwHelper();
eventAddress = event->getPacketAddress(this->device) + hwHelper.getSingleTimestampPacketSize();
event->setPacketsInUse(event->getPacketsInUse() + 1);
NEO::PipeControlArgs args;
args.dcFlushEnable = L3FlushEnable;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
eventAddress, Event::STATE_SIGNALED,
commandContainer.getDevice()->getHardwareInfo(),
args);
}
}
if (neoDevice->getDebugger()) {

View File

@@ -95,7 +95,7 @@ struct Event : _ze_event_handle_t {
};
template <typename TagSizeT>
class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT> {
class KernelTimestampsData : public NEO::TimestampPackets<TagSizeT> {
public:
uint32_t getPacketsUsed() const { return packetsUsed; }
void setPacketsUsed(uint32_t value) { packetsUsed = value; }
@@ -139,7 +139,7 @@ struct EventImp : public Event {
size_t getSinglePacketSize() const override { return NEO::TimestampPackets<TagSizeT>::getSinglePacketSize(); };
ze_result_t hostEventSetValue(uint32_t eventValue) override;
std::unique_ptr<KernelEventCompletionData<TagSizeT>[]> kernelEventCompletionData;
std::unique_ptr<KernelTimestampsData<TagSizeT>[]> kernelTimestampsData;
Device *device;
int index;
@@ -148,9 +148,8 @@ struct EventImp : public Event {
protected:
ze_result_t calculateProfilingData();
ze_result_t queryStatusKernelTimestamp();
ze_result_t queryStatusNonTimestamp();
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
void assignKernelEventCompletionData(void *address);
void assignTimestampData(void *address);
};
struct EventPool : _ze_event_pool_handle_t {

View File

@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
if (eventPool->isEventPoolTimestampFlagSet()) {
event->setEventTimestampFlag(true);
event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
}
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0);
globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0);
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0);
globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0);
contextStartTS = kernelTimestampsData[0].getContextStartValue(0);
contextEndTS = kernelTimestampsData[0].getContextEndValue(0);
for (uint32_t i = 0; i < kernelCount; i++) {
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) {
if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) {
globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId);
}
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) {
contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId);
}
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) {
contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId);
}
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) {
globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId);
}
}
}
@@ -75,12 +75,11 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
}
template <typename TagSizeT>
void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
void EventImp<TagSizeT>::assignTimestampData(void *address) {
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCopy = 0;
packetsToCopy = kernelEventCompletionData[i].getPacketsUsed();
uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) {
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address);
address = ptrOffset(address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
}
}
@@ -88,27 +87,11 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
assignTimestampData(hostAddress);
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) {
return ZE_RESULT_NOT_READY;
}
}
}
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) {
if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) {
return ZE_RESULT_NOT_READY;
}
}
@@ -119,6 +102,7 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatus() {
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
if (metricStreamer != nullptr) {
*hostAddr = metricStreamer->getNotificationState();
@@ -126,9 +110,9 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
this->csr->downloadAllocations();
if (isEventTimestampFlagSet()) {
return queryStatusKernelTimestamp();
} else {
return queryStatusNonTimestamp();
}
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
@@ -146,7 +130,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
}
};
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++) {
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset());
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset());
@@ -155,7 +139,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
}
}
assignKernelEventCompletionData(hostAddress);
assignTimestampData(hostAddress);
return ZE_RESULT_SUCCESS;
}
@@ -224,12 +208,14 @@ ze_result_t EventImp<TagSizeT>::reset() {
if (isEventTimestampFlagSet()) {
kernelCount = EventPacketsCount::maxKernelSplit;
for (uint32_t i = 0; i < kernelCount; i++) {
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
return ZE_RESULT_SUCCESS;
} else {
return hostEventSetValue(Event::STATE_INITIAL);
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
@@ -241,7 +227,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
return ZE_RESULT_NOT_READY;
}
assignKernelEventCompletionData(hostAddress);
assignTimestampData(hostAddress);
calculateProfilingData();
auto eventTsSetFunc = [&](uint64_t &timestampFieldToCopy, uint64_t &timestampFieldForWriting) {
@@ -280,7 +266,7 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
}
if ((*pCount == 0) ||
(*pCount > kernelEventCompletionData[timestampPacket].getPacketsUsed())) {
(*pCount > kernelTimestampsData[timestampPacket].getPacketsUsed())) {
*pCount = this->getPacketsInUse();
return ZE_RESULT_SUCCESS;
}
@@ -292,10 +278,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
memcpy_s(&timestampFieldForWriting, sizeof(uint64_t), static_cast<void *>(&timestampFieldToCopy), sizeof(uint64_t));
};
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);
contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId);
contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId);
globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId);
globalStartTs = kernelTimestampsData[timestampPacket].getGlobalStartValue(packetId);
contextStartTs = kernelTimestampsData[timestampPacket].getContextStartValue(packetId);
contextEndTs = kernelTimestampsData[timestampPacket].getContextEndValue(packetId);
globalEndTs = kernelTimestampsData[timestampPacket].getGlobalEndValue(packetId);
queryTsEventAssignFunc(result.global.kernelStart, globalStartTs);
queryTsEventAssignFunc(result.context.kernelStart, contextStartTs);
@@ -309,31 +295,37 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
template <typename TagSizeT>
void EventImp<TagSizeT>::resetPackets() {
for (uint32_t i = 0; i < kernelCount; i++) {
kernelEventCompletionData[i].setPacketsUsed(1);
kernelTimestampsData[i].setPacketsUsed(1);
}
kernelCount = 1;
}
template <typename TagSizeT>
uint32_t EventImp<TagSizeT>::getPacketsInUse() {
uint32_t packetsInUse = 0;
for (uint32_t i = 0; i < kernelCount; i++) {
packetsInUse += kernelEventCompletionData[i].getPacketsUsed();
if (isEventTimestampFlagSet()) {
uint32_t packetsInUse = 0;
for (uint32_t i = 0; i < kernelCount; i++) {
packetsInUse += kernelTimestampsData[i].getPacketsUsed();
};
return packetsInUse;
} else {
return 1;
}
return packetsInUse;
}
template <typename TagSizeT>
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
}
kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value);
};
template <typename TagSizeT>
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
uint64_t address = getGpuAddress(device);
for (uint32_t i = 0; i < kernelCount - 1; i++) {
address += kernelEventCompletionData[i].getPacketsUsed() *
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
if (isEventTimestampFlagSet() && kernelCount > 1) {
for (uint32_t i = 0; i < kernelCount - 1; i++) {
address += kernelTimestampsData[i].getPacketsUsed() *
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
}
}
return address;
}

View File

@@ -640,15 +640,15 @@ TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventF
}
TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) {
EXPECT_NE(nullptr, event->kernelEventCompletionData);
EXPECT_NE(nullptr, event->kernelTimestampsData);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalEndValue(i));
}
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
@@ -692,7 +692,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
}
TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) {
EXPECT_NE(nullptr, event->kernelEventCompletionData);
EXPECT_NE(nullptr, event->kernelTimestampsData);
event->hostSignal();
ze_result_t result = event->queryStatus();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -702,12 +702,12 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalEndValue(i));
}
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
}
@@ -828,7 +828,7 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
uint32_t pCount = 2;
for (uint32_t packetId = 0; packetId < pCount; packetId++) {
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
}