performance: dont pull host allocation each time in the loop 2

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-27 13:56:54 +00:00
committed by Compute-Runtime-Automation
parent 1652d82081
commit a7ff04e28f

View File

@@ -467,11 +467,13 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
}
}
auto hostAddresss = getHostAddress();
uint32_t packets = 0;
for (uint32_t i = 0; i < this->kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++, packets++) {
if (castToUint64(baseHostAddr) >= castToUint64(ptrOffset(getHostAddress(), totalEventSize))) {
if (castToUint64(baseHostAddr) >= castToUint64(ptrOffset(hostAddresss, totalEventSize))) {
break;
}
copyDataToEventAlloc(ptrOffset(baseHostAddr, contextStartOffset), baseGpuAddr + contextStartOffset, sizeof(TagSizeT), timestampStart);