feature: Implement appendMemoryCopy/Fill for Shared System USM

Related-To: NEO-13697

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2025-06-06 22:53:08 +00:00
committed by Compute-Runtime-Automation
parent 994433d941
commit 805a716fe3
23 changed files with 700 additions and 125 deletions

View File

@@ -343,7 +343,7 @@ struct CommandListCoreFamily : public CommandListImp {
void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds);
size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint32_t bufferRowPitch, uint32_t bufferSlicePitch, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
size_t getAllocationOffsetForAppendBlitFill(void *ptr, NEO::GraphicsAllocation &gpuAllocation);
uint32_t getRegionOffsetForAppendMemoryCopyBlitRegion(AlignedAllocationData *allocationData);
void handlePostSubmissionState();

View File

@@ -838,7 +838,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, srcRowPitch, srcSlicePitch, pDstRegion);
auto allocationStruct = getAlignedAllocationData(this->device, srcPtr, bufferSize, true, false);
auto allocationStruct = getAlignedAllocationData(this->device, false, srcPtr, bufferSize, true, false);
if (allocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -1036,7 +1036,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, destRowPitch, destSlicePitch, pSrcRegion);
auto allocationStruct = getAlignedAllocationData(this->device, dstPtr, bufferSize, false, false);
auto allocationStruct = getAlignedAllocationData(this->device, false, dstPtr, bufferSize, false, false);
if (allocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -1463,6 +1463,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeMemAdvise(ze_device_han
return ZE_RESULT_SUCCESS;
}
static inline void builtinSetArgCopy(Kernel *builtinKernel, uint32_t argIndex, void *argPtr, NEO::GraphicsAllocation *allocation) {
if (allocation) {
builtinKernel->setArgBufferWithAlloc(argIndex, *reinterpret_cast<uintptr_t *>(argPtr), allocation, nullptr);
} else {
builtinKernel->setArgumentValue(argIndex, sizeof(uintptr_t *), argPtr);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1495,8 +1503,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
return ret;
}
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc, nullptr);
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc, nullptr);
builtinSetArgCopy(builtinKernel, 0, dstPtr, dstPtrAlloc);
builtinSetArgCopy(builtinKernel, 1, srcPtr, srcPtrAlloc);
uint64_t elems = size / elementSize;
builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
@@ -1506,12 +1514,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
auto dstAllocationType = dstPtrAlloc->getAllocationType();
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory = this->isUsingSystemAllocation(dstAllocationType);
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstPtrAlloc, device->getDriverHandle()->getSvmAllocsManager());
if (dstPtrAlloc) {
auto dstAllocationType = dstPtrAlloc->getAllocationType();
launchParams.isDestinationAllocationInSystemMemory = this->isUsingSystemAllocation(dstAllocationType);
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstPtrAlloc, device->getDriverHandle()->getSvmAllocsManager());
}
} else {
launchParams.isDestinationAllocationInSystemMemory = true;
}
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinKernel, dispatchKernelArgs, signalEvent, launchParams);
}
@@ -1529,16 +1540,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
uint64_t srcOffset,
uint64_t size,
Event *signalEvent) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
srcOffset += ptrDiff<uintptr_t>(srcPtr, srcPtrAlloc->getGpuAddress());
if (dstPtrAlloc) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
}
if (srcPtrAlloc) {
srcOffset += ptrDiff<uintptr_t>(srcPtr, srcPtrAlloc->getGpuAddress());
}
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation);
auto blitProperties = NEO::BlitProperties::constructPropertiesForSystemCopy(dstPtrAlloc, srcPtrAlloc, dstPtr, srcPtr, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation);
blitProperties.computeStreamPartitionCount = this->partitionCount;
blitProperties.highPriority = isHighPriorityImmediateCmdList();
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
if (dstPtrAlloc) {
commandContainer.addToResidencyContainer(dstPtrAlloc);
}
if (srcPtrAlloc) {
commandContainer.addToResidencyContainer(srcPtrAlloc);
}
commandContainer.addToResidencyContainer(clearColorAllocation);
size_t nBlitsPerRow = NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(blitProperties.copySize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed);
@@ -1762,6 +1780,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
CmdListMemoryCopyParams &memoryCopyParams) {
NEO::Device *neoDevice = device->getNEODevice();
bool sharedSystemEnabled = ((neoDevice->areSharedSystemAllocationsAllowed()) && (NEO::debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.get() == 1));
uint32_t callId = 0;
if (NEO::debugManager.flags.EnableSWTags.get()) {
callId = neoDevice->getRootDeviceEnvironment().tagsManager->incrementAndGetCurrentCallCount();
@@ -1772,14 +1792,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
callId);
}
auto dstAllocationStruct = getAlignedAllocationData(this->device, dstptr, size, false, isCopyOffloadEnabled());
auto srcAllocationStruct = getAlignedAllocationData(this->device, srcptr, size, true, isCopyOffloadEnabled());
auto dstAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, dstptr, size, false, isCopyOffloadEnabled());
auto srcAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, srcptr, size, true, isCopyOffloadEnabled());
if (dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) {
if ((dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) && (sharedSystemEnabled == false)) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc);
if ((dstAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) {
appendMemAdvise(device, reinterpret_cast<void *>(dstAllocationStruct.alignedAllocationPtr), size, static_cast<ze_memory_advice_t>(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION));
}
if ((srcAllocationStruct.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) {
appendMemAdvise(device, reinterpret_cast<void *>(srcAllocationStruct.alignedAllocationPtr), size, static_cast<ze_memory_advice_t>(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION));
}
if (dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) {
memoryCopyParams.copyOffloadAllowed = true;
} else {
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc);
}
const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed);
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
@@ -1997,8 +2029,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
size_t dstSize = this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch);
size_t srcSize = this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch);
auto dstAllocationStruct = getAlignedAllocationData(this->device, dstPtr, dstSize, false, isCopyOffloadEnabled());
auto srcAllocationStruct = getAlignedAllocationData(this->device, srcPtr, srcSize, true, isCopyOffloadEnabled());
auto dstAllocationStruct = getAlignedAllocationData(this->device, false, dstPtr, dstSize, false, isCopyOffloadEnabled());
auto srcAllocationStruct = getAlignedAllocationData(this->device, false, srcPtr, srcSize, true, isCopyOffloadEnabled());
UNRECOVERABLE_IF(srcSlicePitch && srcPitch == 0);
Vec3<size_t> srcSize3 = {srcPitch ? srcPitch : srcRegion->width + srcRegion->originX,
@@ -2251,6 +2283,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
return ZE_RESULT_SUCCESS;
}
static inline void builtinSetArgFill(Kernel *builtinKernel, uint32_t argIndex, uintptr_t argPtr, NEO::GraphicsAllocation *allocation) {
if (allocation) {
builtinKernel->setArgBufferWithAlloc(argIndex, argPtr, allocation, nullptr);
} else {
builtinKernel->setArgumentValue(argIndex, sizeof(argPtr), &argPtr);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, CmdListKernelLaunchParams &launchParams) {
@@ -2264,7 +2304,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
@@ -2292,6 +2332,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
memoryCopyParams.copyOffloadAllowed = isCopyOffloadEnabled();
NEO::Device *neoDevice = device->getNEODevice();
bool sharedSystemEnabled = ((neoDevice->areSharedSystemAllocationsAllowed()) && (NEO::debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.get() == 1));
uint32_t callId = 0;
if (NEO::debugManager.flags.EnableSWTags.get()) {
callId = neoDevice->getRootDeviceEnvironment().tagsManager->incrementAndGetCurrentCallCount();
@@ -2340,18 +2381,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
} else {
if (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS) {
if ((sharedSystemEnabled == false) && (neoDevice->areSharedSystemAllocationsAllowed() == false) && (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS)) {
// first two conditions, above are default, and each may be turned true only with debug variables
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} else {
hostPointerNeedsFlush = true;
}
hostPointerNeedsFlush = true;
}
auto dstAllocation = this->getAlignedAllocationData(this->device, ptr, size, false, false);
if (dstAllocation.alloc == nullptr) {
auto dstAllocation = this->getAlignedAllocationData(this->device, sharedSystemEnabled, ptr, size, false, false);
if ((dstAllocation.alloc == nullptr) && (sharedSystemEnabled == false)) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
if ((dstAllocation.alloc == nullptr) && (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1)) {
appendMemAdvise(device, reinterpret_cast<void *>(dstAllocation.alignedAllocationPtr), size, static_cast<ze_memory_advice_t>(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION));
}
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto builtin = (patternSize == 1)
@@ -2362,8 +2407,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAllocation.alloc, device->getDriverHandle()->getSvmAllocsManager());
if (dstAllocation.alloc) {
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAllocation.alloc, device->getDriverHandle()->getSvmAllocsManager());
}
}
CmdListFillKernelArguments fillArguments = {};
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);
@@ -2400,7 +2447,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
uint32_t value = 0;
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(fillArguments.mainOffset), &fillArguments.mainOffset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
@@ -2447,7 +2495,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
patternAllocOffset += patternSizeToCopy;
} while (patternAllocOffset < patternAllocationSize);
if (fillArguments.leftRemainingBytes == 0) {
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinSetArgFill(builtinKernel, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc, nullptr);
builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls);
@@ -2468,9 +2516,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinKernelRemainder->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1, 1);
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(fillArguments.groups), 1u, 1u};
builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc, nullptr);
builtinSetArgFill(builtinKernelRemainder, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
@@ -2496,9 +2542,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinKernelRemainder->setGroupSize(fillArguments.rightRemainingBytes, 1u, 1u);
ze_group_count_t dispatchKernelArgs{1u, 1u, 1u};
builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc, nullptr);
builtinSetArgFill(builtinKernelRemainder, 0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
@@ -2545,6 +2589,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
NEO::Device *neoDevice = device->getNEODevice();
bool sharedSystemEnabled = neoDevice->areSharedSystemAllocationsAllowed();
if (this->maxFillPaternSizeForCopyEngine < patternSize) {
return ZE_RESULT_ERROR_INVALID_SIZE;
} else {
@@ -2568,6 +2616,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
size,
neoDevice->getRootDeviceIndex(),
nullptr);
DriverHandleImp *driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) {
@@ -2575,20 +2624,32 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), nullptr, nullptr);
}
if (gpuAllocation == nullptr) {
if ((gpuAllocation == nullptr) && (sharedSystemEnabled == false)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
}
auto offset = getAllocationOffsetForAppendBlitFill(ptr, *gpuAllocation);
commandContainer.addToResidencyContainer(gpuAllocation);
uint32_t patternToCommand[4] = {};
memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize);
NEO::BlitProperties blitProperties;
bool useAdditionalTimestamp = false;
if (gpuAllocation) {
auto offset = getAllocationOffsetForAppendBlitFill(ptr, *gpuAllocation);
commandContainer.addToResidencyContainer(gpuAllocation);
blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset);
size_t nBlits = NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed);
useAdditionalTimestamp = nBlits > 1;
} else if (sharedSystemEnabled == true) {
if (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1) {
appendMemAdvise(device, ptr, size, static_cast<ze_memory_advice_t>(ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION));
}
blitProperties = NEO::BlitProperties::constructPropertiesForSystemMemoryFill(reinterpret_cast<uint64_t>(ptr), size, patternToCommand, patternSize, 0ul);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset);
size_t nBlits = NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed);
bool useAdditionalTimestamp = nBlits > 1;
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
}
@@ -2675,7 +2736,7 @@ inline uint64_t CommandListCoreFamily<gfxCoreFamily>::getInputBufferSize(NEO::Im
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload) {
inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocationData(Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload) {
NEO::SvmAllocationData *allocData = nullptr;
void *ptr = const_cast<void *>(buffer);
bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr,
@@ -2697,16 +2758,20 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
// get offset from base of allocation to arg address
offset += reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(alloc->getUnderlyingBuffer());
} else {
alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed, copyOffload);
if (alloc == nullptr) {
return {0u, 0, nullptr, false};
}
alignedPtr = static_cast<uintptr_t>(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment()));
if (alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) {
auto hostAllocCpuPtr = reinterpret_cast<uintptr_t>(alloc->getUnderlyingBuffer());
hostAllocCpuPtr = alignDown(hostAllocCpuPtr, NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment());
auto allignedPtrOffset = sourcePtr - hostAllocCpuPtr;
alignedPtr = ptrOffset(alignedPtr, allignedPtrOffset);
if (sharedSystemEnabled) {
return {reinterpret_cast<uintptr_t>(ptr), 0, nullptr, true};
} else {
alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed, copyOffload);
if (alloc == nullptr) {
return {0u, 0, nullptr, false};
}
alignedPtr = static_cast<uintptr_t>(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment()));
if (alloc->getAllocationType() == NEO::AllocationType::externalHostPtr) {
auto hostAllocCpuPtr = reinterpret_cast<uintptr_t>(alloc->getUnderlyingBuffer());
hostAllocCpuPtr = alignDown(hostAllocCpuPtr, NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment());
auto allignedPtrOffset = sourcePtr - hostAllocCpuPtr;
alignedPtr = ptrOffset(alignedPtr, allignedPtrOffset);
}
}
}
@@ -3366,7 +3431,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false, false);
auto allocationStruct = getAlignedAllocationData(this->device, false, dstptr, sizeof(uint64_t), false, false);
if (allocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -3419,7 +3484,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
const size_t *pOffsets, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
auto dstPtrAllocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents, false, false);
auto dstPtrAllocationStruct = getAlignedAllocationData(this->device, false, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents, false, false);
if (dstPtrAllocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -3465,7 +3530,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::queryKernelTimestamps);
builtinKernel->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestampsValue);
} else {
auto pOffsetAllocationStruct = getAlignedAllocationData(this->device, pOffsets, sizeof(size_t) * numEvents, false, false);
auto pOffsetAllocationStruct = getAlignedAllocationData(this->device, false, pOffsets, sizeof(size_t) * numEvents, false, false);
if (pOffsetAllocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -4082,7 +4147,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
signalEvent = Event::fromHandle(signalEventHandle);
}
auto srcAllocationStruct = getAlignedAllocationData(this->device, ptr, sizeof(uint32_t), true, false);
auto srcAllocationStruct = getAlignedAllocationData(this->device, false, ptr, sizeof(uint32_t), true, false);
if (srcAllocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -4145,7 +4210,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
auto descriptor = reinterpret_cast<zex_write_to_mem_desc_t *>(desc);
size_t bufSize = sizeof(uint64_t);
auto dstAllocationStruct = getAlignedAllocationData(this->device, ptr, bufSize, false, false);
auto dstAllocationStruct = getAlignedAllocationData(this->device, false, ptr, bufSize, false, false);
if (dstAllocationStruct.alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
@@ -4646,7 +4711,15 @@ bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostCopyEventFenceRequired(
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyEngineOperation) const {
return (copyEngineOperation && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool()));
bool srcInLocalPool = false;
bool dstInLocalPool = false;
if (srcAllocation) {
srcInLocalPool = srcAllocation->isAllocatedInLocalMemoryPool();
}
if (dstAllocation) {
dstInLocalPool = dstAllocation->isAllocatedInLocalMemoryPool();
}
return (copyEngineOperation && (srcInLocalPool && !dstInLocalPool));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -404,6 +404,9 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::validateDispatchFlags(bool
bool AppendFillFixture::MockDriverFillHandle::findAllocationDataForRange(const void *buffer,
size_t size,
NEO::SvmAllocationData *&allocData) {
if ((size >= 15) && (size <= 17)) {
return false;
}
mockAllocation.reset(new NEO::MockGraphicsAllocation(const_cast<void *>(buffer), size));
data.gpuAllocations.addAllocation(mockAllocation.get());
allocData = &data;

View File

@@ -687,8 +687,8 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
uint32_t sizePerHwThread),
(kernel, sizePerHwThread));
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocationData(device, buffer, bufferSize, allowHostCopy, copyOffload);
AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocationData(device, sharedSystemEnabled, buffer, bufferSize, allowHostCopy, copyOffload);
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,

View File

@@ -42,7 +42,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
MockCommandListHw() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
MockCommandListHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(), failOnFirstCopy(failOnFirst) {}
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
getAlignedAllocationCalledTimes++;
if (buffer && !failAlignedAlloc) {
return {0, 0, &alignedAlloc, true};
@@ -1536,7 +1536,7 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily
public:
MockCommandListForRegionSize() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
return {0, 0, &mockAllocationPtr, true};
}
ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,

View File

@@ -223,7 +223,7 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationCalledWithInvalidPtrTh
size_t cmdListHostPtrSize = MemoryConstants::pageSize;
void *cmdListHostBuffer = reinterpret_cast<void *>(0x1234);
AlignedAllocationData outData = {};
outData = commandList->getAlignedAllocationData(device, cmdListHostBuffer, cmdListHostPtrSize, false, false);
outData = commandList->getAlignedAllocationData(device, false, cmdListHostBuffer, cmdListHostPtrSize, false, false);
EXPECT_EQ(nullptr, outData.alloc);
}
@@ -288,7 +288,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenGettingAlignedAl
void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize);
size_t expectedOffset = ptrDiff(startMemory, baseAddress);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false);
ASSERT_NE(nullptr, outData.alloc);
auto firstAlloc = outData.alloc;
auto expectedGpuAddress = static_cast<uintptr_t>(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize));
@@ -303,7 +303,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenGettingAlignedAl
expectedGpuAddress = ptrOffset(expectedGpuAddress, alignedOffset);
EXPECT_EQ(outData.offset + offset, expectedOffset);
outData = commandList->getAlignedAllocationData(device, offsetMemory, 4u, false, false);
outData = commandList->getAlignedAllocationData(device, false, offsetMemory, 4u, false, false);
ASSERT_NE(nullptr, outData.alloc);
EXPECT_EQ(firstAlloc, outData.alloc);
EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer());
@@ -329,7 +329,7 @@ HWTEST_F(CommandListCreateTests, givenCmdListHostPointerUsedWhenRemoveHostPtrAll
size_t cmdListHostPtrSize = MemoryConstants::pageSize;
void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, cmdListHostBuffer, cmdListHostPtrSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, cmdListHostBuffer, cmdListHostPtrSize, false, false);
ASSERT_NE(nullptr, outData.alloc);
for (const auto &engine : engines) {
@@ -1558,8 +1558,8 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationWhenInternalMemWithinD
commandList->initialize(myDevice.get(), NEO::EngineGroupType::copy, 0u);
auto buffer = std::make_unique<uint8_t>(0x100);
auto outData1 = commandList->getAlignedAllocationData(device, buffer.get(), 0x100, true, false);
auto outData2 = commandList->getAlignedAllocationData(device, &buffer.get()[5], 0x1, true, false);
auto outData1 = commandList->getAlignedAllocationData(device, false, buffer.get(), 0x100, true, false);
auto outData2 = commandList->getAlignedAllocationData(device, false, &buffer.get()[5], 0x1, true, false);
EXPECT_NE(outData1.alloc, outData2.alloc);
driverHandle->getMemoryManager()->freeGraphicsMemory(outData1.alloc);
driverHandle->getMemoryManager()->freeGraphicsMemory(outData2.alloc);
@@ -1572,8 +1572,8 @@ HWTEST_F(CommandListCreateTests, givenGetAlignedAllocationWhenExternalMemWithinD
commandList->initialize(myDevice.get(), NEO::EngineGroupType::copy, 0u);
auto buffer = std::make_unique<uint8_t>(0x100);
auto outData1 = commandList->getAlignedAllocationData(device, buffer.get(), 0x100, true, false);
auto outData2 = commandList->getAlignedAllocationData(device, &buffer.get()[5], 0x1, true, false);
auto outData1 = commandList->getAlignedAllocationData(device, false, buffer.get(), 0x100, true, false);
auto outData2 = commandList->getAlignedAllocationData(device, false, &buffer.get()[5], 0x1, true, false);
EXPECT_EQ(outData1.alloc, outData2.alloc);
driverHandle->getMemoryManager()->freeGraphicsMemory(outData1.alloc);
commandList->hostPtrMap.clear();

View File

@@ -946,7 +946,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingA
size_t offsetSize = 20;
void *offsetPointer = ptrOffset(importPointer, allocOffset);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, importPointer, importSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, importPointer, importSize, false, false);
auto gpuBaseAddress = static_cast<size_t>(hostAllocation->getGpuAddress());
auto expectedAlignedAddress = alignDown(gpuBaseAddress, NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
size_t expectedOffset = gpuBaseAddress - expectedAlignedAddress;
@@ -955,7 +955,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingA
EXPECT_EQ(hostAllocation, outData.alloc);
EXPECT_EQ(expectedOffset, outData.offset);
outData = commandList->getAlignedAllocationData(device, offsetPointer, offsetSize, false, false);
outData = commandList->getAlignedAllocationData(device, false, offsetPointer, offsetSize, false, false);
expectedOffset += allocOffset;
EXPECT_EQ(importPointer, hostAllocation->getUnderlyingBuffer());
EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr);
@@ -980,7 +980,7 @@ HWTEST_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingP
auto hostAllocation = hostDriverHandle->findHostPointerAllocation(offsetPointer, pointerSize, device->getRootDeviceIndex());
ASSERT_NE(nullptr, hostAllocation);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, offsetPointer, pointerSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, offsetPointer, pointerSize, false, false);
auto expectedAlignedAddress = static_cast<uintptr_t>(hostAllocation->getGpuAddress());
EXPECT_EQ(heapPointer, hostAllocation->getUnderlyingBuffer());
EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr);

View File

@@ -328,6 +328,40 @@ HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopy
commandList->cmdQImmediate = nullptr;
}
HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopySharedSystemUsmThenSuccessIsReturned) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1);
debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(1);
ze_command_queue_desc_t queueDesc = {};
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
commandList->device = device;
commandList->cmdQImmediate = queue.get();
commandList->cmdListType = CommandList::CommandListType::typeImmediate;
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, queue->executeCommandListsCalled);
EXPECT_EQ(1u, queue->synchronizeCalled);
commandList->cmdQImmediate = nullptr;
}
HWTEST_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned) {
ze_command_queue_desc_t queueDesc = {};
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
@@ -369,6 +403,37 @@ HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemor
commandList->getCsr(false)->getInternalAllocationStorage()->getTemporaryAllocations().freeAllGraphicsAllocations(device->getNEODevice());
}
HWTEST2_F(AppendMemoryCopyTests, givenImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineAndSharedSystemUsmThenSuccessIsReturned, IsNotXeHpgCore) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1);
debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(-1);
ze_command_queue_desc_t queueDesc = {};
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
commandList->device = device;
commandList->cmdQImmediate = queue.get();
commandList->cmdListType = CommandList::CommandListType::typeImmediate;
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, queue->executeCommandListsCalled);
EXPECT_EQ(0u, queue->synchronizeCalled);
}
HWTEST_F(AppendMemoryCopyTests, givenAsyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenProgramCmdStreamWithFlushTask) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;

View File

@@ -34,7 +34,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
using BaseClass::getAllocationOffsetForAppendBlitFill;
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
return {0, 0, nullptr, true};
}
ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
@@ -85,6 +85,26 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillToNotDe
EXPECT_EQ(ret, ZE_RESULT_ERROR_INVALID_ARGUMENT);
}
HWTEST_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillToSharedSystemUsmThenSuccessReturned) {
MockCommandListForMemFill<FamilyType::gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::copy, 0u);
DebugManagerStateRestore restorer;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf; // enables return true for Device::areSharedSystemAllocationsAllowed()
uint8_t pattern = 1;
size_t size = 0x1000;
void *ptr = malloc(size); // reinterpret_cast<void *>(0x1234);
auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), size, nullptr, 0, nullptr, copyParams);
EXPECT_EQ(ret, ZE_RESULT_SUCCESS);
free(ptr);
}
using MemFillPlatforms = IsGen12LP;
HWTEST2_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillThenCopyBltIsProgrammed, MemFillPlatforms) {

View File

@@ -108,6 +108,91 @@ HWTEST_F(AppendFillTest, givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOn
delete[] ptr;
}
HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndMemAdviseThenReturnSuccess) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1);
debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.set(1);
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
int pattern = 0;
const size_t size = 17;
uint8_t *ptr = new uint8_t[size];
CmdListMemoryCopyParams copyParams = {};
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
delete[] ptr;
}
HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndNoMemAdviseThenReturnSuccess) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(1);
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
int pattern = 0;
const size_t size = 17;
uint8_t *ptr = new uint8_t[size];
CmdListMemoryCopyParams copyParams = {};
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
delete[] ptr;
}
HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndTreatNonUsmForTransfersAsSharedSystemNotSetReturnSuccessLegacyMode) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(-1);
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
int pattern = 0;
const size_t size = 17;
uint8_t *ptr = new uint8_t[size];
CmdListMemoryCopyParams copyParams = {};
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
delete[] ptr;
}
HWTEST_F(AppendFillTest, givenAppendMemoryFillWithSharedSystemUsmAndNoDebugFlagsSetReturnError) {
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(-1);
debugManager.flags.TreatNonUsmForTransfersAsSharedSystem.set(-1);
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = 0xf;
int pattern = 0;
const size_t size = 17;
uint8_t *ptr = new uint8_t[size];
CmdListMemoryCopyParams copyParams = {};
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
delete[] ptr;
}
HWTEST_F(AppendFillTest, givenAppendMemoryFillWithUnalignedSizeWhenPatternSizeIsOneThenDispatchTwoKernels) {
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);

View File

@@ -80,7 +80,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
MockCommandListExtensionHw() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
MockCommandListExtensionHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(), failOnFirstCopy(failOnFirst) {}
AlignedAllocationData getAlignedAllocationData(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
AlignedAllocationData getAlignedAllocationData(L0::Device *device, bool sharedSystemEnabled, const void *buffer, uint64_t bufferSize, bool allowHostCopy, bool copyOffload) override {
getAlignedAllocationCalledTimes++;
if (buffer) {
return {0, 0, &alignedAlloc, true};
@@ -701,7 +701,7 @@ HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithNoScopeAndSystemMem
void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize);
size_t expectedOffset = ptrDiff(startMemory, baseAddress);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false);
ASSERT_NE(nullptr, outData.alloc);
auto expectedGpuAddress = static_cast<uintptr_t>(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize));
EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer());
@@ -903,7 +903,7 @@ HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeCont
void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize);
size_t expectedOffset = ptrDiff(startMemory, baseAddress);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, startMemory, cmdListHostPtrSize, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device, false, startMemory, cmdListHostPtrSize, false, false);
ASSERT_NE(nullptr, outData.alloc);
auto expectedGpuAddress = static_cast<uintptr_t>(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize));
EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer());

View File

@@ -4199,7 +4199,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<FamilyType::gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false);
EXPECT_EQ(nullptr, outData.alloc);
result = context->freeMem(ptr);
@@ -4223,7 +4223,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<FamilyType::gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);
@@ -4249,7 +4249,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenSharedAllocationPassedToGetAllig
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<FamilyType::gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, ptr, size, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device1, false, ptr, size, false, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);
@@ -4273,7 +4273,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllig
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<FamilyType::gfxCoreFamily>>();
commandList->initialize(device0, NEO::EngineGroupType::renderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, ptr, size, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, false, ptr, size, false, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);
@@ -4299,7 +4299,7 @@ HWTEST_F(MultipleDevicePeerAllocationTest, givenSharedAllocationPassedToGetAllig
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<FamilyType::gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, ptr, size, false, false);
AlignedAllocationData outData = commandList->getAlignedAllocationData(device0, false, ptr, size, false, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);