diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 6558c8dd04..203655f698 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -343,7 +343,7 @@ struct CommandListCoreFamily : public CommandListImp { void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds); size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); uint64_t getInputBufferSize(NEO::ImageType imageType, uint32_t bufferRowPitch, uint32_t bufferSlicePitch, const ze_image_region_t *region); - MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload); + MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload); size_t getAllocationOffsetForAppendBlitFill(void *ptr, NEO::GraphicsAllocation &gpuAllocation); uint32_t getRegionOffsetForAppendMemoryCopyBlitRegion(AlignedAllocationData *allocationData); void handlePostSubmissionState(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index dbea3675e2..23b83e0a41 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -838,7 +838,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemoryExt(z uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, srcRowPitch, srcSlicePitch, pDstRegion); - auto allocationStruct = getAlignedAllocationData(this->device, srcPtr, bufferSize, true, false); + auto allocationStruct = getAlignedAllocationData(this->device, false, srcPtr, bufferSize, true, false); if (allocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -1036,7 +1036,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemoryExt(voi uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, destRowPitch, destSlicePitch, pSrcRegion); - auto allocationStruct = getAlignedAllocationData(this->device, dstPtr, bufferSize, false, false); + auto allocationStruct = getAlignedAllocationData(this->device, false, dstPtr, bufferSize, false, false); if (allocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -1463,6 +1463,14 @@ ze_result_t CommandListCoreFamily::executeMemAdvise(ze_device_han return ZE_RESULT_SUCCESS; } +static inline void builtinSetArgCopy(Kernel *builtinKernel, uint32_t argIndex, void *argPtr, NEO::GraphicsAllocation *allocation) { + if (allocation) { + builtinKernel->setArgBufferWithAlloc(argIndex, *reinterpret_cast(argPtr), allocation, nullptr); + } else { + builtinKernel->setArgumentValue(argIndex, sizeof(uintptr_t *), argPtr); + } +} + template ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, @@ -1495,8 +1503,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(v return ret; } - builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast(dstPtr), dstPtrAlloc, nullptr); - builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast(srcPtr), srcPtrAlloc, nullptr); + builtinSetArgCopy(builtinKernel, 0, dstPtr, dstPtrAlloc); + builtinSetArgCopy(builtinKernel, 1, srcPtr, srcPtrAlloc); uint64_t elems = size / elementSize; builtinKernel->setArgumentValue(2, sizeof(elems), &elems); @@ -1506,12 +1514,15 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(v uint32_t groups = static_cast((size + ((static_cast(groupSizeX) * elementSize) - 1)) / (static_cast(groupSizeX) * elementSize)); ze_group_count_t dispatchKernelArgs{groups, 1u, 1u}; - auto dstAllocationType = dstPtrAlloc->getAllocationType(); launchParams.isBuiltInKernel = true; - launchParams.isDestinationAllocationInSystemMemory = this->isUsingSystemAllocation(dstAllocationType); - - if constexpr (checkIfAllocationImportedRequired()) { - launchParams.isDestinationAllocationImported = this->isAllocationImported(dstPtrAlloc, device->getDriverHandle()->getSvmAllocsManager()); + if (dstPtrAlloc) { + auto dstAllocationType = dstPtrAlloc->getAllocationType(); + launchParams.isDestinationAllocationInSystemMemory = this->isUsingSystemAllocation(dstAllocationType); + if constexpr (checkIfAllocationImportedRequired()) { + launchParams.isDestinationAllocationImported = this->isAllocationImported(dstPtrAlloc, device->getDriverHandle()->getSvmAllocsManager()); + } + } else { + launchParams.isDestinationAllocationInSystemMemory = true; } return CommandListCoreFamily::appendLaunchKernelSplit(builtinKernel, dispatchKernelArgs, signalEvent, launchParams); } @@ -1529,16 +1540,23 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(uintptr_t uint64_t srcOffset, uint64_t size, Event *signalEvent) { - dstOffset += ptrDiff(dstPtr, dstPtrAlloc->getGpuAddress()); - srcOffset += ptrDiff(srcPtr, srcPtrAlloc->getGpuAddress()); + if (dstPtrAlloc) { + dstOffset += ptrDiff(dstPtr, dstPtrAlloc->getGpuAddress()); + } + if (srcPtrAlloc) { + srcOffset += ptrDiff(srcPtr, srcPtrAlloc->getGpuAddress()); + } auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation(); - auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation); + auto blitProperties = NEO::BlitProperties::constructPropertiesForSystemCopy(dstPtrAlloc, srcPtrAlloc, dstPtr, srcPtr, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation); blitProperties.computeStreamPartitionCount = this->partitionCount; blitProperties.highPriority = isHighPriorityImmediateCmdList(); - - commandContainer.addToResidencyContainer(dstPtrAlloc); - commandContainer.addToResidencyContainer(srcPtrAlloc); + if (dstPtrAlloc) { + commandContainer.addToResidencyContainer(dstPtrAlloc); + } + if (srcPtrAlloc) { + commandContainer.addToResidencyContainer(srcPtrAlloc); + } commandContainer.addToResidencyContainer(clearColorAllocation); size_t nBlitsPerRow = NEO::BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(blitProperties.copySize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); @@ -1762,6 +1780,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, CmdListMemoryCopyParams &memoryCopyParams) { NEO::Device *neoDevice = device->getNEODevice(); + bool sharedSystemEnabled = ((neoDevice->areSharedSystemAllocationsAllowed()) && (NEO::debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.get() == 1)); + uint32_t callId = 0; if (NEO::debugManager.flags.EnableSWTags.get()) { callId = neoDevice->getRootDeviceEnvironment().tagsManager->incrementAndGetCurrentCallCount(); @@ -1772,14 +1792,26 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, callId); } - auto dstAllocationStruct = getAlignedAllocationData(this->device, dstptr, size, false, isCopyOffloadEnabled()); - auto srcAllocationStruct = getAlignedAllocationData(this->device, srcptr, size, true, isCopyOffloadEnabled()); + auto dstAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, dstptr, size, false, isCopyOffloadEnabled()); + auto srcAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, srcptr, size, true, isCopyOffloadEnabled()); - if (dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) { + if ((dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) && (sharedSystemEnabled == false)) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } - memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc); + if ((dstAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) { + appendMemAdvise(device, reinterpret_cast(dstAllocationStruct.alignedAllocationPtr), size, static_cast(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION)); + } + + if ((srcAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) { + appendMemAdvise(device, reinterpret_cast(srcAllocationStruct.alignedAllocationPtr), size, static_cast(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION)); + } + + if (dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) { + memoryCopyParams.copyOffloadAllowed = true; + } else { + memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc); + } const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed); const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled; @@ -1997,8 +2029,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d size_t dstSize = this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch); size_t srcSize = this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch); - auto dstAllocationStruct = getAlignedAllocationData(this->device, dstPtr, dstSize, false, isCopyOffloadEnabled()); - auto srcAllocationStruct = getAlignedAllocationData(this->device, srcPtr, srcSize, true, isCopyOffloadEnabled()); + auto dstAllocationStruct = getAlignedAllocationData(this->device, false, dstPtr, dstSize, false, isCopyOffloadEnabled()); + auto srcAllocationStruct = getAlignedAllocationData(this->device, false, srcPtr, srcSize, true, isCopyOffloadEnabled()); UNRECOVERABLE_IF(srcSlicePitch && srcPitch == 0); Vec3 srcSize3 = {srcPitch ? srcPitch : srcRegion->width + srcRegion->originX, @@ -2251,6 +2283,14 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const voi return ZE_RESULT_SUCCESS; } +static inline void builtinSetArgFill(Kernel *builtinKernel, uint32_t argIndex, uintptr_t argPtr, NEO::GraphicsAllocation *allocation) { + if (allocation) { + builtinKernel->setArgBufferWithAlloc(argIndex, argPtr, allocation, nullptr); + } else { + builtinKernel->setArgumentValue(argIndex, sizeof(argPtr), &argPtr); + } +} + template ze_result_t CommandListCoreFamily::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, CmdListKernelLaunchParams &launchParams) { @@ -2264,7 +2304,7 @@ ze_result_t CommandListCoreFamily::appendUnalignedFillKernel(bool builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); ze_group_count_t dispatchKernelRemainderArgs{static_cast(unalignedSize / groupSizeX), 1u, 1u}; uint32_t value = *(reinterpret_cast(pattern)); - builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr); + builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinKernel->setArgumentValue(2, sizeof(value), &value); @@ -2292,6 +2332,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, memoryCopyParams.copyOffloadAllowed = isCopyOffloadEnabled(); NEO::Device *neoDevice = device->getNEODevice(); + bool sharedSystemEnabled = ((neoDevice->areSharedSystemAllocationsAllowed()) && (NEO::debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.get() == 1)); uint32_t callId = 0; if (NEO::debugManager.flags.EnableSWTags.get()) { callId = neoDevice->getRootDeviceEnvironment().tagsManager->incrementAndGetCurrentCallCount(); @@ -2340,18 +2381,22 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } } else { - if (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS) { + if ((sharedSystemEnabled == false) && (neoDevice->areSharedSystemAllocationsAllowed() == false) && (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS)) { + // first two conditions, above are default, and each may be turned true only with debug variables return ZE_RESULT_ERROR_INVALID_ARGUMENT; - } else { - hostPointerNeedsFlush = true; } + hostPointerNeedsFlush = true; } - auto dstAllocation = this->getAlignedAllocationData(this->device, ptr, size, false, false); - if (dstAllocation.alloc == nullptr) { + auto dstAllocation = this->getAlignedAllocationData(this->device, sharedSystemEnabled, ptr, size, false, false); + if ((dstAllocation.alloc == nullptr) && (sharedSystemEnabled == false)) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } + if ((dstAllocation.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) { + appendMemAdvise(device, reinterpret_cast(dstAllocation.alignedAllocationPtr), size, static_cast(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION)); + } + auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto builtin = (patternSize == 1) @@ -2362,8 +2407,10 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, launchParams.isBuiltInKernel = true; launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush; - if constexpr (checkIfAllocationImportedRequired()) { - launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAllocation.alloc, device->getDriverHandle()->getSvmAllocsManager()); + if (dstAllocation.alloc) { + if constexpr (checkIfAllocationImportedRequired()) { + launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAllocation.alloc, device->getDriverHandle()->getSvmAllocsManager()); + } } CmdListFillKernelArguments fillArguments = {}; setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel); @@ -2400,7 +2447,8 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, uint32_t value = 0; memset(&value, *reinterpret_cast(pattern), 4); - builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr); + + builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgumentValue(1, sizeof(fillArguments.mainOffset), &fillArguments.mainOffset); builtinKernel->setArgumentValue(2, sizeof(value), &value); @@ -2447,7 +2495,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, patternAllocOffset += patternSizeToCopy; } while (patternAllocOffset < patternAllocationSize); if (fillArguments.leftRemainingBytes == 0) { - builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr); + builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast(patternGfxAllocPtr), patternGfxAlloc, nullptr); builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls); @@ -2468,9 +2516,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, builtinKernelRemainder->setGroupSize(static_cast(fillArguments.mainGroupSize), 1, 1); ze_group_count_t dispatchKernelArgs{static_cast(fillArguments.groups), 1u, 1u}; - builtinKernelRemainder->setArgBufferWithAlloc(0, - dstAllocation.alignedAllocationPtr, - dstAllocation.alloc, nullptr); + builtinSetArgFill(builtinKernelRemainder, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernelRemainder->setArgumentValue(1, sizeof(dstOffsetRemainder), &dstOffsetRemainder); @@ -2496,9 +2542,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, builtinKernelRemainder->setGroupSize(fillArguments.rightRemainingBytes, 1u, 1u); ze_group_count_t dispatchKernelArgs{1u, 1u, 1u}; - builtinKernelRemainder->setArgBufferWithAlloc(0, - dstAllocation.alignedAllocationPtr, - dstAllocation.alloc, nullptr); + builtinSetArgFill(builtinKernelRemainder, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernelRemainder->setArgumentValue(1, sizeof(dstOffsetRemainder), &dstOffsetRemainder); @@ -2545,6 +2589,10 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, template ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, Event *signalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { + + NEO::Device *neoDevice = device->getNEODevice(); + bool sharedSystemEnabled = neoDevice->areSharedSystemAllocationsAllowed(); + if (this->maxFillPaternSizeForCopyEngine < patternSize) { return ZE_RESULT_ERROR_INVALID_SIZE; } else { @@ -2568,6 +2616,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons size, neoDevice->getRootDeviceIndex(), nullptr); + DriverHandleImp *driverHandle = static_cast(device->getDriverHandle()); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) { @@ -2575,20 +2624,32 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), nullptr, nullptr); } - if (gpuAllocation == nullptr) { + if ((gpuAllocation == nullptr) && (sharedSystemEnabled == false)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } - auto offset = getAllocationOffsetForAppendBlitFill(ptr, *gpuAllocation); - - commandContainer.addToResidencyContainer(gpuAllocation); uint32_t patternToCommand[4] = {}; memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize); + NEO::BlitProperties blitProperties; + bool useAdditionalTimestamp = false; + if (gpuAllocation) { + auto offset = getAllocationOffsetForAppendBlitFill(ptr, *gpuAllocation); + + commandContainer.addToResidencyContainer(gpuAllocation); + + blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset); + size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + useAdditionalTimestamp = nBlits > 1; + } else if (sharedSystemEnabled == true) { + if (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1) { + appendMemAdvise(device, ptr, size, static_cast(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION)); + } + blitProperties = NEO::BlitProperties::constructPropertiesForSystemMemoryFill(reinterpret_cast(ptr), size, patternToCommand, patternSize, 0ul); + } else { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } - auto blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset); - size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); - bool useAdditionalTimestamp = nBlits > 1; if (useAdditionalBlitProperties) { setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp); } @@ -2675,7 +2736,7 @@ inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::Im } template -inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload) { +inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocationData(Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload) { NEO::SvmAllocationData *allocData = nullptr; void *ptr = const_cast(buffer); bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, @@ -2697,16 +2758,20 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll // get offset from base of allocation to arg address offset += reinterpret_cast(ptr) - reinterpret_cast(alloc->getUnderlyingBuffer()); } else { - alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed, copyOffload); - if (alloc == nullptr) { - return {0u, 0, nullptr, false}; - } - alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); - if (alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) { - auto hostAllocCpuPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); - hostAllocCpuPtr = alignDown(hostAllocCpuPtr, NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); - auto allignedPtrOffset = sourcePtr - hostAllocCpuPtr; - alignedPtr = ptrOffset(alignedPtr, allignedPtrOffset); + if (sharedSystemEnabled) { + return {reinterpret_cast(ptr), 0, nullptr, true}; + } else { + alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed, copyOffload); + if (alloc == nullptr) { + return {0u, 0, nullptr, false}; + } + alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); + if (alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) { + auto hostAllocCpuPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); + hostAllocCpuPtr = alignDown(hostAllocCpuPtr, NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); + auto allignedPtrOffset = sourcePtr - hostAllocCpuPtr; + alignedPtr = ptrOffset(alignedPtr, allignedPtrOffset); + } } } @@ -3366,7 +3431,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false)); - auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false, false); + auto allocationStruct = getAlignedAllocationData(this->device, false, dstptr, sizeof(uint64_t), false, false); if (allocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -3419,7 +3484,7 @@ ze_result_t CommandListCoreFamily::appendQueryKernelTimestamps( const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - auto dstPtrAllocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents, false, false); + auto dstPtrAllocationStruct = getAlignedAllocationData(this->device, false, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents, false, false); if (dstPtrAllocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -3465,7 +3530,7 @@ ze_result_t CommandListCoreFamily::appendQueryKernelTimestamps( builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::queryKernelTimestamps); builtinKernel->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestampsValue); } else { - auto pOffsetAllocationStruct = getAlignedAllocationData(this->device, pOffsets, sizeof(size_t) * numEvents, false, false); + auto pOffsetAllocationStruct = getAlignedAllocationData(this->device, false, pOffsets, sizeof(size_t) * numEvents, false, false); if (pOffsetAllocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -4082,7 +4147,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, signalEvent = Event::fromHandle(signalEventHandle); } - auto srcAllocationStruct = getAlignedAllocationData(this->device, ptr, sizeof(uint32_t), true, false); + auto srcAllocationStruct = getAlignedAllocationData(this->device, false, ptr, sizeof(uint32_t), true, false); if (srcAllocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -4145,7 +4210,7 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc auto descriptor = reinterpret_cast(desc); size_t bufSize = sizeof(uint64_t); - auto dstAllocationStruct = getAlignedAllocationData(this->device, ptr, bufSize, false, false); + auto dstAllocationStruct = getAlignedAllocationData(this->device, false, ptr, bufSize, false, false); if (dstAllocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -4646,7 +4711,15 @@ bool CommandListCoreFamily::isDeviceToHostCopyEventFenceRequired( template bool CommandListCoreFamily::isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyEngineOperation) const { - return (copyEngineOperation && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool())); + bool srcInLocalPool = false; + bool dstInLocalPool = false; + if (srcAllocation) { + srcInLocalPool = srcAllocation->isAllocatedInLocalMemoryPool(); + } + if (dstAllocation) { + dstInLocalPool = dstAllocation->isAllocatedInLocalMemoryPool(); + } + return (copyEngineOperation && (srcInLocalPool && !dstInLocalPool)); } template diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index f30031f9f1..29c2f6e851 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -404,6 +404,9 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::validateDispatchFlags(bool bool AppendFillFixture::MockDriverFillHandle::findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData *&allocData) { + if ((size >= 15) && (size <= 17)) { + return false; + } mockAllocation.reset(new NEO::MockGraphicsAllocation(const_cast(buffer), size)); data.gpuAllocations.addAllocation(mockAllocation.get()); allocData = &data; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index ecda9d6558..a2ae0353e9 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -687,8 +687,8 @@ class MockCommandListCoreFamily : public CommandListCoreFamily { uint32_t sizePerHwThread), (kernel, sizePerHwThread)); - AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { - return L0::CommandListCoreFamily::getAlignedAllocationData(device, buffer, bufferSize, allowHostCopy, copyOffload); + AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { + return L0::CommandListCoreFamily::getAlignedAllocationData(device, sharedSystemEnabled, buffer, bufferSize, allowHostCopy, copyOffload); } ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index f603052e39..9298e74003 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -42,7 +42,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily>() {} MockCommandListHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily>(), failOnFirstCopy(failOnFirst) {} - AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { + AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { getAlignedAllocationCalledTimes++; if (buffer && !failAlignedAlloc) { return {0, 0, &alignedAlloc, true}; @@ -1536,7 +1536,7 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily public: MockCommandListForRegionSize() : WhiteBox<::L0::CommandListCoreFamily>() {} - AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { + AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { return {0, 0, &mockAllocationPtr, true}; } ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 31def4e2a3..da45b949b7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -223,7 +223,7 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationCalledWithInvalidPtrTh size_t cmdListHostPtrSize = MemoryConstants::pageSize; void *cmdListHostBuffer = reinterpret_cast(0x1234); AlignedAllocationData outData = {}; - outData = commandList->getAlignedAllocationData(device, cmdListHostBuffer, cmdListHostPtrSize, false, false); + outData = commandList->getAlignedAllocationData(device, false, cmdListHostBuffer, cmdListHostPtrSize, false, false); EXPECT_EQ(nullptr, outData.alloc); } @@ -288,7 +288,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenGettingAlignedAl void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); size_t expectedOffset = ptrDiff(startMemory, baseAddress); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false); ASSERT_NE(nullptr, outData.alloc); auto firstAlloc = outData.alloc; auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); @@ -303,7 +303,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenGettingAlignedAl expectedGpuAddress = ptrOffset(expectedGpuAddress, alignedOffset); EXPECT_EQ(outData.offset + offset, expectedOffset); - outData = commandList->getAlignedAllocationData(device, offsetMemory, 4u, false, false); + outData = commandList->getAlignedAllocationData(device, false, offsetMemory, 4u, false, false); ASSERT_NE(nullptr, outData.alloc); EXPECT_EQ(firstAlloc, outData.alloc); EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); @@ -329,7 +329,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenRemoveHostPtrAll size_t cmdListHostPtrSize = MemoryConstants::pageSize; void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, cmdListHostBuffer, cmdListHostPtrSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, cmdListHostBuffer, cmdListHostPtrSize, false, false); ASSERT_NE(nullptr, outData.alloc); for (const auto &engine : engines) { @@ -1558,8 +1558,8 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationWhenInternalMemWithinD commandList->initialize(myDevice.get(), NEO::EngineGroupType::copy, 0u); auto buffer = std::make_unique(0x100); - auto outData1 = commandList->getAlignedAllocationData(device, buffer.get(), 0x100, true, false); - auto outData2 = commandList->getAlignedAllocationData(device, &buffer.get()[5], 0x1, true, false); + auto outData1 = commandList->getAlignedAllocationData(device, false, buffer.get(), 0x100, true, false); + auto outData2 = commandList->getAlignedAllocationData(device, false, &buffer.get()[5], 0x1, true, false); EXPECT_NE(outData1.alloc, outData2.alloc); driverHandle->getMemoryManager()->freeGraphicsMemory(outData1.alloc); driverHandle->getMemoryManager()->freeGraphicsMemory(outData2.alloc); @@ -1572,8 +1572,8 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationWhenExternalMemWithinD commandList->initialize(myDevice.get(), NEO::EngineGroupType::copy, 0u); auto buffer = std::make_unique(0x100); - auto outData1 = commandList->getAlignedAllocationData(device, buffer.get(), 0x100, true, false); - auto outData2 = commandList->getAlignedAllocationData(device, &buffer.get()[5], 0x1, true, false); + auto outData1 = commandList->getAlignedAllocationData(device, false, buffer.get(), 0x100, true, false); + auto outData2 = commandList->getAlignedAllocationData(device, false, &buffer.get()[5], 0x1, true, false); EXPECT_EQ(outData1.alloc, outData2.alloc); driverHandle->getMemoryManager()->freeGraphicsMemory(outData1.alloc); commandList->hostPtrMap.clear(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 798d7a7b8b..49e1fee688 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -946,7 +946,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingA size_t offsetSize = 20; void *offsetPointer = ptrOffset(importPointer, allocOffset); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, importPointer, importSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, importPointer, importSize, false, false); auto gpuBaseAddress = static_cast(hostAllocation->getGpuAddress()); auto expectedAlignedAddress = alignDown(gpuBaseAddress, NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); size_t expectedOffset = gpuBaseAddress - expectedAlignedAddress; @@ -955,7 +955,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingA EXPECT_EQ(hostAllocation, outData.alloc); EXPECT_EQ(expectedOffset, outData.offset); - outData = commandList->getAlignedAllocationData(device, offsetPointer, offsetSize, false, false); + outData = commandList->getAlignedAllocationData(device, false, offsetPointer, offsetSize, false, false); expectedOffset += allocOffset; EXPECT_EQ(importPointer, hostAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr); @@ -980,7 +980,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingP auto hostAllocation = hostDriverHandle->findHostPointerAllocation(offsetPointer, pointerSize, device->getRootDeviceIndex()); ASSERT_NE(nullptr, hostAllocation); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, offsetPointer, pointerSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, offsetPointer, pointerSize, false, false); auto expectedAlignedAddress = static_cast(hostAllocation->getGpuAddress()); EXPECT_EQ(heapPointer, hostAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index 3566a387c5..873242ebd4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -328,6 +328,40 @@ HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopy commandList->cmdQImmediate = nullptr; } +HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopySharedSystemUsmThenSuccessIsReturned) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1); + debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(1); + + ze_command_queue_desc_t queueDesc = {}; + auto queue = std::make_unique>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto commandList = std::make_unique>>(); + ASSERT_NE(nullptr, commandList); + commandList->device = device; + commandList->cmdQImmediate = queue.get(); + commandList->cmdListType = CommandList::CommandListType::typeImmediate; + ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, ret); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + + sharedSystemMemCapabilities = 0xf; + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, queue->executeCommandListsCalled); + EXPECT_EQ(1u, queue->synchronizeCalled); + + commandList->cmdQImmediate = nullptr; +} + HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned) { ze_command_queue_desc_t queueDesc = {}; auto queue = std::make_unique>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc); @@ -369,6 +403,37 @@ HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemor commandList->getCsr(false)->getInternalAllocationStorage()->getTemporaryAllocations().freeAllGraphicsAllocations(device->getNEODevice()); } +HWTEST2_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineAndSharedSystemUsmThenSuccessIsReturned, IsNotXeHpgCore) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1); + debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(-1); + + ze_command_queue_desc_t queueDesc = {}; + auto queue = std::make_unique>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto commandList = std::make_unique>>(); + ASSERT_NE(nullptr, commandList); + commandList->device = device; + commandList->cmdQImmediate = queue.get(); + commandList->cmdListType = CommandList::CommandListType::typeImmediate; + ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::copy, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, ret); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + + sharedSystemMemCapabilities = 0xf; + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, queue->executeCommandListsCalled); + EXPECT_EQ(0u, queue->synchronizeCalled); +} + HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenProgramCmdStreamWithFlushTask) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index 99ff64dcac..0cc6eefd5c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -34,7 +34,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily cmdList; + cmdList.initialize(device, NEO::EngineGroupType::copy, 0u); + DebugManagerStateRestore restorer; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + + sharedSystemMemCapabilities = 0xf; // enables return true for Device::areSharedSystemAllocationsAllowed() + + uint8_t pattern = 1; + size_t size = 0x1000; + void *ptr = malloc(size); // reinterpret_cast(0x1234); + auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(ret, ZE_RESULT_SUCCESS); + free(ptr); +} + using MemFillPlatforms = IsGen12LP; HWTEST2_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillThenCopyBltIsProgrammed, MemFillPlatforms) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index 3c1411b2ff..5b98cbc644 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -108,6 +108,91 @@ HWTEST_F(AppendFillTest, givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOn delete[] ptr; } +HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndMemAdviseThenReturnSuccess) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1); + debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(1); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + sharedSystemMemCapabilities = 0xf; + + int pattern = 0; + const size_t size = 17; + uint8_t *ptr = new uint8_t[size]; + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + delete[] ptr; +} + +HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndNoMemAdviseThenReturnSuccess) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + sharedSystemMemCapabilities = 0xf; + + int pattern = 0; + const size_t size = 17; + uint8_t *ptr = new uint8_t[size]; + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + delete[] ptr; +} + +HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndTreatNonUsmForTransfersAsSharedSystemNotSetReturnSuccessLegacyMode) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(-1); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + sharedSystemMemCapabilities = 0xf; + + int pattern = 0; + const size_t size = 17; + uint8_t *ptr = new uint8_t[size]; + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + delete[] ptr; +} + +HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndNoDebugFlagsSetReturnError) { + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(-1); + debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(-1); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + + auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + VariableBackup sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities}; + sharedSystemMemCapabilities = 0xf; + + int pattern = 0; + const size_t size = 17; + uint8_t *ptr = new uint8_t[size]; + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); + delete[] ptr; +} + HWTEST_F(AppendFillTest, givenAppendMemoryFillWithUnalignedSizeWhenPatternSizeIsOneThenDispatchTwoKernels) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::compute, 0u); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp index d06133a130..e5eb3b71ac 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp @@ -80,7 +80,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily>() {} MockCommandListExtensionHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily>(), failOnFirstCopy(failOnFirst) {} - AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { + AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override { getAlignedAllocationCalledTimes++; if (buffer) { return {0, 0, &alignedAlloc, true}; @@ -701,7 +701,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithNoScopeAndSystemMem void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); size_t expectedOffset = ptrDiff(startMemory, baseAddress); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false); ASSERT_NE(nullptr, outData.alloc); auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); @@ -903,7 +903,7 @@ HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeCont void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); size_t expectedOffset = ptrDiff(startMemory, baseAddress); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false); ASSERT_NE(nullptr, outData.alloc); auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index bc078a77d4..b11ad62e8b 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -4199,7 +4199,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false); EXPECT_EQ(nullptr, outData.alloc); result = context->freeMem(ptr); @@ -4223,7 +4223,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); @@ -4249,7 +4249,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenSharedAllocationPassedToGetAllig auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); @@ -4273,7 +4273,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::renderCompute, 0u); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, ptr, size, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, false, ptr, size, false, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); @@ -4299,7 +4299,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenSharedAllocationPassedToGetAllig auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, ptr, size, false, false); + AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, false, ptr, size, false, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index b682571fd9..47395e06e3 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -432,6 +432,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForcePostSyncL1Flush, -1, "-1: default (do nothi DECLARE_DEBUG_VARIABLE(int32_t, AllowNotZeroForCompressedOnWddm, -1, "-1: default (do nothing), 0: do not set AllowNotZeroed for compressed resources, 1: set AllowNotZeroed for compressed resources"); DECLARE_DEBUG_VARIABLE(int32_t, ForceWddmHugeChunkSizeMB, -1, "-1: default (do nothing), >0: set given huge chunk size in MegaBytes for WDDM"); DECLARE_DEBUG_VARIABLE(int64_t, ForceGmmSystemMemoryBufferForAllocations, 0, "0: default, >0: (bitmask) for given Allocation Types, force GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER gmm resource type"); +DECLARE_DEBUG_VARIABLE(int32_t, EmitMemAdvisePriorToCopyForNonUsm, -1, "Enable Memadvise to system memory for copy/fill with shared system input: -1: default, 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, TreatNonUsmForTransfersAsSharedSystem, -1, "-1: default, 0: import non-usm as external host ptr on copy/fill (legacy mode), 1: treat non usm on copy/fill as shared system usm") /*DIRECT SUBMISSION FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD") diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index fa7ec440f1..bdcb98c6dd 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -319,7 +319,7 @@ BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryFill(const B uint64_t offset = blitProperties.dstOffset.x; while (sizeToFill != 0) { auto tmpCmd = blitCmd; - tmpCmd.setDestinationBaseAddress(ptrOffset(blitProperties.dstAllocation->getGpuAddress(), static_cast(offset))); + tmpCmd.setDestinationBaseAddress(ptrOffset(blitProperties.dstGpuAddress, static_cast(offset))); uint64_t height = 0; uint64_t width = 0; if (sizeToFill <= maxWidth) { @@ -338,7 +338,9 @@ BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryFill(const B tmpCmd.setDestinationY2CoordinateBottom(static_cast(height)); tmpCmd.setDestinationPitch(static_cast(width * patternSize)); - appendBlitMemoryOptionsForFillBuffer(blitProperties.dstAllocation, tmpCmd, rootDeviceEnvironment); + if (blitProperties.dstAllocation) { + appendBlitMemoryOptionsForFillBuffer(blitProperties.dstAllocation, tmpCmd, rootDeviceEnvironment); + } appendBlitFillCommand(blitProperties, tmpCmd); if (useAdditionalBlitProperties && (firstCommand || lastCommand)) { diff --git a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl index 3e9d930572..83b7837529 100644 --- a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl @@ -47,13 +47,15 @@ BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(con blitCmd.setDestinationMOCS(mocs); uint32_t compressionFormat = 0; - if (blitProperties.dstAllocation->isCompressionEnabled()) { - auto resourceFormat = blitProperties.dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - compressionFormat = static_cast(rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat)); + + if (blitProperties.dstAllocation) { + if (blitProperties.dstAllocation->isCompressionEnabled()) { + auto resourceFormat = blitProperties.dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + compressionFormat = static_cast(rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat)); + } + + appendBlitMemSetCompressionFormat(&blitCmd, blitProperties.dstAllocation, compressionFormat); } - - appendBlitMemSetCompressionFormat(&blitCmd, blitProperties.dstAllocation, compressionFormat); - blitCmd.setFillData(*blitProperties.fillPattern); const bool useAdditionalBlitProperties = rootDeviceEnvironment.getHelper().useAdditionalBlitProperties(); @@ -64,7 +66,7 @@ BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(con bool firstCommand = true; while (sizeToFill != 0) { auto tmpCmd = blitCmd; - tmpCmd.setDestinationStartAddress(ptrOffset(blitProperties.dstAllocation->getGpuAddress(), static_cast(offset))); + tmpCmd.setDestinationStartAddress(ptrOffset(blitProperties.dstGpuAddress, static_cast(offset))); uint64_t height = 0; uint64_t width = 0; if (sizeToFill <= maxBlitSetWidth) { diff --git a/shared/source/helpers/blit_properties.cpp b/shared/source/helpers/blit_properties.cpp index 521ca077aa..a61a766cd4 100644 --- a/shared/source/helpers/blit_properties.cpp +++ b/shared/source/helpers/blit_properties.cpp @@ -20,6 +20,7 @@ BlitProperties BlitProperties::constructPropertiesForMemoryFill(GraphicsAllocati .blitDirection = BlitterConstants::BlitDirection::fill, .dstAllocation = dstAllocation, .fillPattern = pattern, + .dstGpuAddress = dstAllocation->getGpuAddress(), .copySize = {size, 1, 1}, .dstOffset = {offset, 0, 0}, .srcOffset = {0, 0, 0}, @@ -27,6 +28,19 @@ BlitProperties BlitProperties::constructPropertiesForMemoryFill(GraphicsAllocati .isSystemMemoryPoolUsed = MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())}; } +BlitProperties BlitProperties::constructPropertiesForSystemMemoryFill(uint64_t dstPtr, size_t size, uint32_t *pattern, size_t patternSize, size_t offset) { + return { + .blitDirection = BlitterConstants::BlitDirection::fill, + .dstAllocation = nullptr, + .fillPattern = pattern, + .dstGpuAddress = dstPtr, + .copySize = {size, 1, 1}, + .dstOffset = {offset, 0, 0}, + .srcOffset = {0, 0, 0}, + .fillPatternSize = patternSize, + .isSystemMemoryPoolUsed = true}; +} + BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation *memObjAllocation, @@ -129,6 +143,54 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds .isSystemMemoryPoolUsed = MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool())}; } +BlitProperties BlitProperties::constructPropertiesForSystemCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation, uint64_t dstPtr, uint64_t srcPtr, + const Vec3 &dstOffset, const Vec3 &srcOffset, Vec3 copySize, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation) { + copySize.y = copySize.y ? copySize.y : 1; + copySize.z = copySize.z ? copySize.z : 1; + uint64_t dst; + uint64_t src; + if (dstAllocation) { + dst = dstAllocation->getGpuAddress(); + } else { + dst = dstPtr; + } + + if (srcAllocation) { + src = srcAllocation->getGpuAddress(); + } else { + src = srcPtr; + } + + bool sysMem; + if ((srcAllocation) && (dstAllocation)) { + sysMem = MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool()); + } else { + sysMem = true; + } + + return { + .blitSyncProperties = {}, + .csrDependencies = {}, + .multiRootDeviceEventSync = nullptr, + .blitDirection = BlitterConstants::BlitDirection::bufferToBuffer, + .auxTranslationDirection = AuxTranslationDirection::none, + .dstAllocation = dstAllocation, + .srcAllocation = srcAllocation, + .clearColorAllocation = clearColorAllocation, + .dstGpuAddress = dst, + .srcGpuAddress = src, + .copySize = copySize, + .dstOffset = dstOffset, + .srcOffset = srcOffset, + .dstRowPitch = dstRowPitch, + .dstSlicePitch = dstSlicePitch, + .srcRowPitch = srcRowPitch, + .srcSlicePitch = srcSlicePitch, + .isSystemMemoryPoolUsed = sysMem}; +} + BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection, GraphicsAllocation *allocation, GraphicsAllocation *clearColorAllocation) { diff --git a/shared/source/helpers/blit_properties.h b/shared/source/helpers/blit_properties.h index 669415cee6..9f935080be 100644 --- a/shared/source/helpers/blit_properties.h +++ b/shared/source/helpers/blit_properties.h @@ -47,6 +47,9 @@ struct BlitSyncProperties { struct BlitProperties { static BlitProperties constructPropertiesForMemoryFill(GraphicsAllocation *dstAllocation, size_t size, uint32_t *pattern, size_t patternSize, size_t offset); + + static BlitProperties constructPropertiesForSystemMemoryFill(uint64_t dstPtr, size_t size, uint32_t *pattern, size_t patternSize, size_t offset); + static BlitProperties constructPropertiesForReadWrite(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation *memObjAllocation, @@ -62,6 +65,11 @@ struct BlitProperties { size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation); + static BlitProperties constructPropertiesForSystemCopy(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation, uint64_t dstPtr, uint64_t srcPtr, + const Vec3 &dstOffset, const Vec3 &srcOffset, Vec3 copySize, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation); + static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection, GraphicsAllocation *allocation, GraphicsAllocation *clearColorAllocation); diff --git a/shared/source/xe2_hpg_core/command_stream_receiver_hw_xe2_hpg_core.cpp b/shared/source/xe2_hpg_core/command_stream_receiver_hw_xe2_hpg_core.cpp index de3c7457a4..96ce71662b 100644 --- a/shared/source/xe2_hpg_core/command_stream_receiver_hw_xe2_hpg_core.cpp +++ b/shared/source/xe2_hpg_core/command_stream_receiver_hw_xe2_hpg_core.cpp @@ -214,16 +214,34 @@ void BlitCommandsHelper::appendBlitCommandsMemCopy(const BlitProperties uint8_t compressionFormat = 0; - if (dstAllocation->isCompressionEnabled()) { - auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); - } else if (srcAllocation->isCompressionEnabled()) { - auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + if (dstAllocation) { + if (dstAllocation->isCompressionEnabled()) { + auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + } + } + if (compressionFormat == 0) { + if (srcAllocation) { + if (srcAllocation->isCompressionEnabled()) { + auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + } + } } if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { - if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool()) || !MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + bool enable = false; + if (srcAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool())) { + enable = true; + } + } + if (dstAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + enable = true; + } + } + if (enable) { compressionFormat = static_cast(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); } } diff --git a/shared/source/xe3_core/command_stream_receiver_hw_xe3_core.cpp b/shared/source/xe3_core/command_stream_receiver_hw_xe3_core.cpp index 756178a305..7eb904f86f 100644 --- a/shared/source/xe3_core/command_stream_receiver_hw_xe3_core.cpp +++ b/shared/source/xe3_core/command_stream_receiver_hw_xe3_core.cpp @@ -168,16 +168,34 @@ void BlitCommandsHelper::appendBlitCommandsMemCopy(const BlitProperties uint8_t compressionFormat = 0; - if (dstAllocation->isCompressionEnabled()) { - auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); - } else if (srcAllocation->isCompressionEnabled()) { - auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + if (dstAllocation) { + if (dstAllocation->isCompressionEnabled()) { + auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + } + } + if (compressionFormat == 0) { + if (srcAllocation) { + if (srcAllocation->isCompressionEnabled()) { + auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + } + } } if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { - if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool()) || !MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + bool enable = false; + if (srcAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool())) { + enable = true; + } + } + if (dstAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + enable = true; + } + } + if (enable) { compressionFormat = static_cast(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); } } diff --git a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp index c14ab56de4..3819b1f5f5 100644 --- a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp @@ -102,27 +102,35 @@ void BlitCommandsHelper::appendBlitCommandsMemCopy(const BlitProperties blitCmd.setDestinationMOCS(mocs); blitCmd.setSourceMOCS(mocs); - if (dstAllocation->isCompressionEnabled()) { - auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); - blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); - blitCmd.setCompressionFormat(compressionFormat); + if (dstAllocation) { + if (dstAllocation->isCompressionEnabled()) { + auto resourceFormat = dstAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); + blitCmd.setCompressionFormat(compressionFormat); + } } - if (srcAllocation->isCompressionEnabled()) { - auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); - auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); - blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); - blitCmd.setCompressionFormat(compressionFormat); + if (srcAllocation) { + if (srcAllocation->isCompressionEnabled()) { + auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); + auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); + blitCmd.setCompressionFormat(compressionFormat); + } } if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { - if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool())) { - blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); - blitCmd.setCompressionFormat(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); + if (srcAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(srcAllocation->getMemoryPool())) { + blitCmd.setSourceCompressible(MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); + blitCmd.setCompressionFormat(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); + } } - if (!MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { - blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); - blitCmd.setCompressionFormat(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); + if (dstAllocation) { + if (!MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + blitCmd.setDestinationCompressible(MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); + blitCmd.setCompressionFormat(debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); + } } } diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index a846d5d609..1933f518c0 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -655,6 +655,8 @@ PipelinedEuThreadArbitration = -1 ExperimentalUSMAllocationReuseCleaner = -1 DummyPageBackingEnabled = 0 EnableDeferBacking = 0 +EmitMemAdvisePriorToCopyForNonUsm = -1 +TreatNonUsmForTransfersAsSharedSystem = -1 SetMaxBVHLevels = -1 GetSipBinaryFromExternalLib = -1 LogUsmReuse = 0 diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index 3aedc8b79d..497dc5b96d 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -72,6 +72,126 @@ TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForReadW blitProperties.dstAllocation->hostPtrTaskCountAssignment--; } +TEST(BlitCommandsHelperTest, GivenTwoGraphicAllocationsConstructPropertiesForSystemCopyCreatedCorrectly) { + uint32_t src[] = {1, 2, 3, 4}; + uint32_t dst[] = {4, 3, 2, 1}; + uint32_t clear[] = {5, 6, 7, 8}; + uint64_t srcGpuAddr = 0x12345; + uint64_t dstGpuAddr = 0x54321; + uint64_t clearGpuAddr = 0x5678; + std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); + std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); + std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); + + Vec3 srcOffsets{1, 2, 3}; + Vec3 dstOffsets{3, 2, 1}; + Vec3 copySize{2, 2, 2}; + + size_t srcRowPitch = 2; + size_t srcSlicePitch = 3; + + size_t dstRowPitch = 2; + size_t dstSlicePitch = 3; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForSystemCopy(dstAlloc.get(), srcAlloc.get(), dstGpuAddr, srcGpuAddr, + dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, clearColorAllocation.get()); + + EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::bufferToBuffer); + EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get()); + EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get()); + EXPECT_EQ(blitProperties.clearColorAllocation, clearColorAllocation.get()); + EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr); + EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr); + EXPECT_EQ(blitProperties.copySize, copySize); + EXPECT_EQ(blitProperties.dstOffset, dstOffsets); + EXPECT_EQ(blitProperties.srcOffset, srcOffsets); + EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch); + EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch); + EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch); + EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch); + EXPECT_FALSE(blitProperties.isSystemMemoryPoolUsed); +} + +TEST(BlitCommandsHelperTest, GivenSourceGraphicAllocationConstructPropertiesForSystemCopyCreatedCorrectly) { + uint32_t src[] = {1, 2, 3, 4}; + uint32_t clear[] = {5, 6, 7, 8}; + uint64_t srcGpuAddr = 0x12345; + uint64_t dstGpuAddr = 0x54321; + uint64_t clearGpuAddr = 0x5678; + std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); + GraphicsAllocation *dstAlloc = nullptr; + std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); + + Vec3 srcOffsets{1, 2, 3}; + Vec3 dstOffsets{0, 0, 0}; + Vec3 copySize{2, 0, 0}; + + size_t srcRowPitch = 0; + size_t srcSlicePitch = 0; + + size_t dstRowPitch = 0; + size_t dstSlicePitch = 0; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForSystemCopy(dstAlloc, srcAlloc.get(), dstGpuAddr, srcGpuAddr, + dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, clearColorAllocation.get()); + + EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::bufferToBuffer); + EXPECT_EQ(blitProperties.dstAllocation, nullptr); + EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get()); + EXPECT_EQ(blitProperties.clearColorAllocation, clearColorAllocation.get()); + EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr); + EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr); + EXPECT_EQ(blitProperties.dstOffset, dstOffsets); + EXPECT_EQ(blitProperties.srcOffset, srcOffsets); + EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch); + EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch); + EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch); + EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch); + EXPECT_TRUE(blitProperties.isSystemMemoryPoolUsed); +} + +TEST(BlitCommandsHelperTest, GivenDestinationGraphicAllocationConstructPropertiesForSystemCopyCreatedCorrectly) { + uint32_t dst[] = {1, 2, 3, 4}; + uint32_t clear[] = {5, 6, 7, 8}; + uint64_t srcGpuAddr = 0x12345; + uint64_t dstGpuAddr = 0x54321; + uint64_t clearGpuAddr = 0x5678; + GraphicsAllocation *srcAlloc = nullptr; + std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); + std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); + + Vec3 srcOffsets{0, 0, 0}; + Vec3 dstOffsets{3, 2, 1}; + Vec3 copySize{2, 2, 2}; + + size_t srcRowPitch = 2; + size_t srcSlicePitch = 3; + + size_t dstRowPitch = 2; + size_t dstSlicePitch = 3; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForSystemCopy(dstAlloc.get(), srcAlloc, dstGpuAddr, srcGpuAddr, + dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, clearColorAllocation.get()); + + EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::bufferToBuffer); + EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get()); + EXPECT_EQ(blitProperties.srcAllocation, nullptr); + EXPECT_EQ(blitProperties.clearColorAllocation, clearColorAllocation.get()); + EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr); + EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr); + EXPECT_EQ(blitProperties.copySize, copySize); + EXPECT_EQ(blitProperties.dstOffset, dstOffsets); + EXPECT_EQ(blitProperties.srcOffset, srcOffsets); + EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch); + EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch); + EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch); + EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch); + EXPECT_TRUE(blitProperties.isSystemMemoryPoolUsed); +} + TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBufferRegionsThenPropertiesCreatedCorrectly) { uint32_t src[] = {1, 2, 3, 4}; uint32_t dst[] = {4, 3, 2, 1}; @@ -278,6 +398,18 @@ HWTEST_F(BlitTests, givenMemoryWhenFillPatternWithBlitThenCommandIsProgrammed) { EXPECT_NE(cmdList.end(), itor); } +HWTEST_F(BlitTests, givenConstructPropertiesForSystemMemoryFillCreatedSuccessfully) { + uint32_t pattern[4] = {1, 0, 0, 0}; + uint64_t dstPtr = 0x1234; + size_t size = 0x1000; + + auto blitProperties = BlitProperties::constructPropertiesForSystemMemoryFill(dstPtr, size, pattern, sizeof(uint32_t), 0); + + EXPECT_EQ(blitProperties.dstAllocation, nullptr); + EXPECT_EQ(blitProperties.dstGpuAddress, dstPtr); + EXPECT_EQ(blitProperties.isSystemMemoryPoolUsed, true); +} + HWTEST_F(BlitTests, givenUnalignedPatternSizeWhenDispatchingBlitFillThenSetCorrectColorDepth) { using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; uint32_t pattern[4] = {1, 0, 0, 0}; @@ -419,6 +551,24 @@ HWTEST_F(BlitTests, givenXyCopyBltCommandWhenAppendBlitCommandsMemCopyIsCalledTh EXPECT_EQ(memcmp(&bltCmd, &bltCmdBefore, sizeof(XY_COPY_BLT)), 0); } +HWTEST2_F(BlitTests, givenXe2HpgCoreWhenAppendBlitCommandsMemCopyIsCalledThenNothingChanged, IsXe2HpgCore) { + auto bltCmd = FamilyType::cmdInitXyCopyBlt; + BlitProperties properties = {}; + properties.dstAllocation = nullptr; + properties.srcAllocation = nullptr; + NEO::BlitCommandsHelper::appendBlitCommandsMemCopy(properties, bltCmd, pDevice->getRootDeviceEnvironment()); + EXPECT_EQ(bltCmd.getCompressionFormat(), 0); +} + +HWTEST2_F(BlitTests, givenXe3CoreWhenAppendBlitCommandsMemCopyIsCalledThenNothingChanged, IsXe3Core) { + auto bltCmd = FamilyType::cmdInitXyCopyBlt; + BlitProperties properties = {}; + properties.dstAllocation = nullptr; + properties.srcAllocation = nullptr; + NEO::BlitCommandsHelper::appendBlitCommandsMemCopy(properties, bltCmd, pDevice->getRootDeviceEnvironment()); + EXPECT_EQ(bltCmd.getCompressionFormat(), 0); +} + HWTEST_F(BlitTests, givenXyBlockCopyBltCommandAndSliceIndex0WhenAppendBaseAddressOffsetIsCalledThenNothingChanged) { using XY_BLOCK_COPY_BLT = typename FamilyType::XY_BLOCK_COPY_BLT; auto bltCmd = FamilyType::cmdInitXyBlockCopyBlt; @@ -958,6 +1108,45 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio EXPECT_EQ(0, memcmp(ptrOffset(stream.getCpuBase(), 0), ptrOffset(stream3.getCpuBase(), 0), std::min(stream.getUsed(), stream3.getUsed()))); } +HWTEST2_F(BlitTests, givenSystemMemoryPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditionalPropertiesWhenCallingDispatchBlitMemoryFillThenTheResultsAreTheSame, MatchAny) { + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), true)); + size_t dstSize = 2 * sizeof(uint32_t) * (maxBlitWidth * maxBlitHeight) + sizeof(uint32_t); + void *dstPtr = malloc(dstSize); + + uint32_t pattern[4] = {}; + pattern[0] = 0x4567; + auto blitProperties = BlitProperties::constructPropertiesForSystemMemoryFill(reinterpret_cast(dstPtr), dstSize, pattern, sizeof(uint32_t), 0); + ASSERT_TRUE(blitProperties.isSystemMemoryPoolUsed); + + auto nBlitsColorFill = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + auto nBlitsFill = NEO::BlitCommandsHelper::getNumberOfBlitsForFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + EXPECT_EQ(3u, nBlitsColorFill); + EXPECT_EQ(nBlitsFill, nBlitsColorFill); + + uint32_t streamBuffer[1200] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); + + uint32_t streamBuffer2[1200] = {}; + LinearStream stream2(streamBuffer2, sizeof(streamBuffer2)); + auto blitResult2 = NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, stream2, pDevice->getRootDeviceEnvironmentRef()); + EXPECT_NE(nullptr, blitResult2.lastBlitCommand); + + // change productHelper to return true + pDevice->getRootDeviceEnvironmentRef().productHelper.reset(new MockProductHelperHw); + auto *mockProductHelper = static_cast *>(pDevice->getRootDeviceEnvironmentRef().productHelper.get()); + mockProductHelper->enableAdditionalBlitProperties = true; + + uint32_t streamBuffer3[1300] = {}; + LinearStream stream3(streamBuffer3, sizeof(streamBuffer3)); + NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, stream3, pDevice->getRootDeviceEnvironmentRef()); + + EXPECT_EQ(stream.getUsed(), stream3.getUsed()); + EXPECT_EQ(0, memcmp(ptrOffset(stream.getCpuBase(), 0), ptrOffset(stream3.getCpuBase(), 0), std::min(stream.getUsed(), stream3.getUsed()))); + free(dstPtr); +} + HWTEST_F(BlitTests, givenBlitPropertieswithImageOperationWhenCallingEstimateBlitCommandSizeThenBlockCopySizeIsReturned) { size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); Vec3 copySize{maxBlitWidth - 1, 1, 1}; diff --git a/shared/test/unit_test/helpers/test_blit_commands_helper_pvc_and_later.cpp b/shared/test/unit_test/helpers/test_blit_commands_helper_pvc_and_later.cpp index 164fc2fef9..35ce273885 100644 --- a/shared/test/unit_test/helpers/test_blit_commands_helper_pvc_and_later.cpp +++ b/shared/test/unit_test/helpers/test_blit_commands_helper_pvc_and_later.cpp @@ -36,6 +36,24 @@ HWTEST2_F(BlitTests, givenOneBytePatternWhenFillPatternWithBlitThenCommandIsProg EXPECT_NE(cmdList.end(), itor); } +HWTEST2_F(BlitTests, givenOneBytePatternWhenFillPatternWithSystemMemoryBlitThenCommandIsProgrammed, IsPVC) { + using MEM_SET = typename FamilyType::MEM_SET; + uint32_t pattern = 1; + void *dstPtr = malloc(4); + uint32_t streamBuffer[100] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + + auto blitProperties = BlitProperties::constructPropertiesForSystemMemoryFill(reinterpret_cast(dstPtr), sizeof(uint32_t), &pattern, sizeof(uint8_t), 0); + + BlitCommandsHelper::dispatchBlitMemoryColorFill(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, ptrOffset(stream.getCpuBase(), 0), stream.getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + free(dstPtr); +} + HWTEST2_F(BlitTests, givenDeviceWithoutDefaultGmmWhenAppendBlitCommandsForVillBufferThenDstCompressionDisabled, IsPVC) { using MEM_SET = typename FamilyType::MEM_SET; uint32_t pattern = 1;