mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
Events workaround for L3Flush issue
Related-To: LOCI-2361 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com> Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
522d2550ee
commit
0c2800d37f
@@ -110,6 +110,43 @@ void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t num
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void programEventL3Flush(ze_event_handle_t hEvent,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
uint64_t eventAddress = event->getPacketAddress(device) + event->getSinglePacketSize();
|
||||
bool isTimestampEvent = event->isEventTimestampFlagSet();
|
||||
if (isTimestampEvent) {
|
||||
eventAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(event->getPacketsInUse() + partitionCount);
|
||||
} else {
|
||||
event->setPacketsInUse(event->getPacketsInUse() + 1);
|
||||
}
|
||||
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
if (partitionCount > 1) {
|
||||
args.workloadPartitionOffset = true;
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(*commandContainer.getCommandStream(),
|
||||
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
static_cast<uint32_t>(event->getSinglePacketSize()),
|
||||
true);
|
||||
}
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
eventAddress, Event::STATE_SIGNALED,
|
||||
commandContainer.getDevice()->getHardwareInfo(),
|
||||
args);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
@@ -228,9 +265,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
this->partitionCount = std::max(partitionCount, this->partitionCount);
|
||||
if (hEvent) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
if (isTimestampEvent && partitionCount > 1) {
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
}
|
||||
if (L3FlushEnable) {
|
||||
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
|
||||
}
|
||||
}
|
||||
|
||||
if (neoDevice->getDebugger()) {
|
||||
|
||||
@@ -95,7 +95,7 @@ struct Event : _ze_event_handle_t {
|
||||
};
|
||||
|
||||
template <typename TagSizeT>
|
||||
class KernelTimestampsData : public NEO::TimestampPackets<TagSizeT> {
|
||||
class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT> {
|
||||
public:
|
||||
uint32_t getPacketsUsed() const { return packetsUsed; }
|
||||
void setPacketsUsed(uint32_t value) { packetsUsed = value; }
|
||||
@@ -139,7 +139,7 @@ struct EventImp : public Event {
|
||||
size_t getSinglePacketSize() const override { return NEO::TimestampPackets<TagSizeT>::getSinglePacketSize(); };
|
||||
ze_result_t hostEventSetValue(uint32_t eventValue) override;
|
||||
|
||||
std::unique_ptr<KernelTimestampsData<TagSizeT>[]> kernelTimestampsData;
|
||||
std::unique_ptr<KernelEventCompletionData<TagSizeT>[]> kernelEventCompletionData;
|
||||
|
||||
Device *device;
|
||||
int index;
|
||||
@@ -148,8 +148,9 @@ struct EventImp : public Event {
|
||||
protected:
|
||||
ze_result_t calculateProfilingData();
|
||||
ze_result_t queryStatusKernelTimestamp();
|
||||
ze_result_t queryStatusNonTimestamp();
|
||||
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
||||
void assignTimestampData(void *address);
|
||||
void assignKernelEventCompletionData(void *address);
|
||||
};
|
||||
|
||||
struct EventPool : _ze_event_pool_handle_t {
|
||||
|
||||
@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
|
||||
if (eventPool->isEventPoolTimestampFlagSet()) {
|
||||
event->setEventTimestampFlag(true);
|
||||
event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
|
||||
}
|
||||
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
|
||||
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
|
||||
|
||||
@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
|
||||
globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0);
|
||||
globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0);
|
||||
contextStartTS = kernelTimestampsData[0].getContextStartValue(0);
|
||||
contextEndTS = kernelTimestampsData[0].getContextEndValue(0);
|
||||
globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0);
|
||||
globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0);
|
||||
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
|
||||
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
|
||||
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) {
|
||||
if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) {
|
||||
globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId);
|
||||
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
|
||||
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
|
||||
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
|
||||
}
|
||||
if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) {
|
||||
contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId);
|
||||
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
|
||||
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
|
||||
}
|
||||
if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) {
|
||||
contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId);
|
||||
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
|
||||
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
|
||||
}
|
||||
if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) {
|
||||
globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId);
|
||||
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
|
||||
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -75,11 +75,12 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::assignTimestampData(void *address) {
|
||||
void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed();
|
||||
uint32_t packetsToCopy = 0;
|
||||
packetsToCopy = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) {
|
||||
kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address);
|
||||
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
|
||||
address = ptrOffset(address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
|
||||
}
|
||||
}
|
||||
@@ -87,11 +88,27 @@ void EventImp<TagSizeT>::assignTimestampData(void *address) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
|
||||
assignTimestampData(hostAddress);
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
uint32_t queryVal = Event::STATE_CLEARED;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed();
|
||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||
if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) {
|
||||
if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
uint32_t queryVal = Event::STATE_CLEARED;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||
if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
}
|
||||
@@ -102,7 +119,6 @@ ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
|
||||
uint32_t queryVal = Event::STATE_CLEARED;
|
||||
|
||||
if (metricStreamer != nullptr) {
|
||||
*hostAddr = metricStreamer->getNotificationState();
|
||||
@@ -110,9 +126,9 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||
this->csr->downloadAllocations();
|
||||
if (isEventTimestampFlagSet()) {
|
||||
return queryStatusKernelTimestamp();
|
||||
} else {
|
||||
return queryStatusNonTimestamp();
|
||||
}
|
||||
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
|
||||
return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
@@ -130,7 +146,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
|
||||
}
|
||||
};
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed();
|
||||
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t j = 0; j < packetsToSet; j++) {
|
||||
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset());
|
||||
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset());
|
||||
@@ -139,7 +155,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
|
||||
baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
|
||||
}
|
||||
}
|
||||
assignTimestampData(hostAddress);
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -208,14 +224,12 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
||||
if (isEventTimestampFlagSet()) {
|
||||
kernelCount = EventPacketsCount::maxKernelSplit;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
||||
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
||||
}
|
||||
hostEventSetValue(Event::STATE_INITIAL);
|
||||
resetPackets();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return hostEventSetValue(Event::STATE_INITIAL);
|
||||
}
|
||||
hostEventSetValue(Event::STATE_INITIAL);
|
||||
resetPackets();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
@@ -227,7 +241,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
assignTimestampData(hostAddress);
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
calculateProfilingData();
|
||||
|
||||
auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) {
|
||||
@@ -288,10 +302,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
|
||||
packetId = static_cast<NEO::SubDevice *>(deviceImp->neoDevice)->getSubDeviceIndex();
|
||||
}
|
||||
|
||||
globalStartTs = kernelTimestampsData[timestampPacket].getGlobalStartValue(packetId);
|
||||
contextStartTs = kernelTimestampsData[timestampPacket].getContextStartValue(packetId);
|
||||
contextEndTs = kernelTimestampsData[timestampPacket].getContextEndValue(packetId);
|
||||
globalEndTs = kernelTimestampsData[timestampPacket].getGlobalEndValue(packetId);
|
||||
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);
|
||||
contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId);
|
||||
contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId);
|
||||
globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId);
|
||||
|
||||
queryTsEventAssignFunc(result.global.kernelStart, globalStartTs);
|
||||
queryTsEventAssignFunc(result.context.kernelStart, contextStartTs);
|
||||
@@ -305,37 +319,31 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::resetPackets() {
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
kernelTimestampsData[i].setPacketsUsed(1);
|
||||
kernelEventCompletionData[i].setPacketsUsed(1);
|
||||
}
|
||||
kernelCount = 1;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
uint32_t EventImp<TagSizeT>::getPacketsInUse() {
|
||||
if (isEventTimestampFlagSet()) {
|
||||
uint32_t packetsInUse = 0;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
packetsInUse += kernelTimestampsData[i].getPacketsUsed();
|
||||
};
|
||||
return packetsInUse;
|
||||
} else {
|
||||
return 1;
|
||||
uint32_t packetsInUse = 0;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
packetsInUse += kernelEventCompletionData[i].getPacketsUsed();
|
||||
}
|
||||
return packetsInUse;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
|
||||
kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value);
|
||||
};
|
||||
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
|
||||
uint64_t address = getGpuAddress(device);
|
||||
if (isEventTimestampFlagSet() && kernelCount > 1) {
|
||||
for (uint32_t i = 0; i < kernelCount - 1; i++) {
|
||||
address += kernelTimestampsData[i].getPacketsUsed() *
|
||||
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
|
||||
}
|
||||
for (uint32_t i = 0; i < kernelCount - 1; i++) {
|
||||
address += kernelEventCompletionData[i].getPacketsUsed() *
|
||||
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
@@ -640,15 +640,15 @@ TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventF
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) {
|
||||
EXPECT_NE(nullptr, event->kernelTimestampsData);
|
||||
EXPECT_NE(nullptr, event->kernelEventCompletionData);
|
||||
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
|
||||
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextEndValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalEndValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i));
|
||||
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i));
|
||||
}
|
||||
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
|
||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||
}
|
||||
|
||||
EXPECT_EQ(1u, event->kernelCount);
|
||||
@@ -692,7 +692,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) {
|
||||
EXPECT_NE(nullptr, event->kernelTimestampsData);
|
||||
EXPECT_NE(nullptr, event->kernelEventCompletionData);
|
||||
event->hostSignal();
|
||||
ze_result_t result = event->queryStatus();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
@@ -702,12 +702,12 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
||||
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
|
||||
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextStartValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalStartValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextEndValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalEndValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i));
|
||||
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i));
|
||||
}
|
||||
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
|
||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||
}
|
||||
EXPECT_EQ(1u, event->kernelCount);
|
||||
}
|
||||
@@ -799,7 +799,7 @@ TEST_F(EventQueryTimestampExpWithSubDevice, givenEventWhenQuerytimestampExpWithS
|
||||
uint32_t numPackets = 2;
|
||||
|
||||
for (uint32_t packetId = 0; packetId < numPackets; packetId++) {
|
||||
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
|
||||
}
|
||||
uint32_t pCount = 0;
|
||||
@@ -865,7 +865,7 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
|
||||
uint32_t pCount = 2;
|
||||
|
||||
for (uint32_t packetId = 0; packetId < pCount; packetId++) {
|
||||
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
|
||||
}
|
||||
|
||||
|
||||
@@ -143,6 +143,146 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
|
||||
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernelL3Flush = Test<ModuleFixture>;
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableWalkerPartition.set(1);
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
kernel.setGroupSize(1, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
EXPECT_LT(1u, pCommandList->partitionCount);
|
||||
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorLri);
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, itorPC.size());
|
||||
uint32_t postSyncCount = 0u;
|
||||
for (auto it : itorPC) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncCount++;
|
||||
}
|
||||
}
|
||||
ASSERT_LE(1u, postSyncCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableWalkerPartition.set(1);
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
kernel.setGroupSize(1, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
EXPECT_LT(1u, pCommandList->partitionCount);
|
||||
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorLri);
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, itorPC.size());
|
||||
uint32_t postSyncCount = 0u;
|
||||
for (auto it : itorPC) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
postSyncCount++;
|
||||
}
|
||||
}
|
||||
ASSERT_LE(1u, postSyncCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableWalkerPartition.set(0);
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
kernel.setGroupSize(1, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
EXPECT_EQ(1u, pCommandList->partitionCount);
|
||||
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(cmdList.end(), itorLri);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpCore) {
|
||||
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user