Use system fence only when using system allocations or system scope event

Related-To: NEO-6959

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-05-27 16:47:43 +00:00
committed by Compute-Runtime-Automation
parent 9fa1c9d4cc
commit afceaa6e19
14 changed files with 1001 additions and 245 deletions

View File

@@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams {
bool isCooperative = false;
bool isKernelSplitOperation = false;
bool isBuiltInKernel = false;
bool isDestinationAllocationInSystemMemory = false;
};
struct CommandList : _ze_command_list_handle_t {

View File

@@ -630,8 +630,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
pSrcRegion->depth / groupSizeZ};
auto dstAllocationType = allocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
hEvent, numWaitEvents, phWaitEvents, launchParams);
@@ -862,9 +866,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
auto dstAllocationType = dstPtrAlloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
launchParams.isKernelSplitOperation = true;
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::SVM_CPU) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, launchParams);
}
@@ -1298,8 +1308,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams);
}
@@ -1354,8 +1368,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),
&dispatchFuncArgs, hSignalEvent,
numWaitEvents,
@@ -1429,6 +1447,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
CmdListKernelLaunchParams launchParams = {};
launchParams.isKernelSplitOperation = true;
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
if (patternSize == 1) {
Kernel *builtinFunction = nullptr;
@@ -2129,8 +2148,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams);
if (appendResult != ZE_RESULT_SUCCESS) {

View File

@@ -131,7 +131,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->containsStatelessUncachedResource, // requiresUncachedMocs
false, // useGlobalAtomics
internalUsage, // isInternal
launchParams.isCooperative // isCooperative
launchParams.isCooperative, // isCooperative
false, // isHostScopeSignalEvent
false // isKernelUsingSystemAllocation
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);

View File

@@ -157,6 +157,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
uint64_t eventAddress = 0;
bool isTimestampEvent = false;
bool l3FlushEnable = false;
bool isHostSignalScopeEvent = false;
if (hEvent) {
auto event = Event::fromHandle(hEvent);
eventAlloc = &event->getAllocation(this->device);
@@ -166,6 +167,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
l3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(flushRequired, hwInfo);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
bool isKernelUsingSystemAllocation = false;
if (!launchParams.isBuiltInKernel) {
auto &kernelAllocations = kernel->getResidencyContainer();
for (auto &allocation : kernelAllocations) {
if (allocation == nullptr) {
continue;
}
if (allocation->getAllocationType() == NEO::AllocationType::BUFFER_HOST_MEMORY) {
isKernelUsingSystemAllocation = true;
}
}
} else {
isKernelUsingSystemAllocation = launchParams.isDestinationAllocationInSystemMemory;
}
if (kernel->hasIndirectAllocationsAllowed()) {
@@ -176,6 +193,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
}
if (unifiedMemoryControls.indirectHostAllocationsAllowed) {
this->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
isKernelUsingSystemAllocation = true;
}
if (unifiedMemoryControls.indirectSharedAllocationsAllowed) {
this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
@@ -227,7 +245,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->containsStatelessUncachedResource, // requiresUncachedMocs
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
internalUsage, // isInternal
launchParams.isCooperative // isCooperative
launchParams.isCooperative, // isCooperative
isHostSignalScopeEvent, // isHostScopeSignalEvent
isKernelUsingSystemAllocation // isKernelUsingSystemAllocation
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;