Include events from split in relaxed dependencies
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
4585b82ada
commit
bbc13485d3
|
@ -361,7 +361,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
|||
return performCpuMemcpy(dstptr, srcptr, size, dstAllocData, srcAllocData, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
if (this->isAppendSplitNeeded(dstptr, srcptr, size)) {
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size);
|
||||
if (isSplitNeeded) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, true, (numWaitEvents > 0), [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, numWaitEvents, phWaitEvents);
|
||||
});
|
||||
|
@ -369,7 +370,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
|||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
}
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0) || isSplitNeeded, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -393,7 +394,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
|||
|
||||
ze_result_t ret;
|
||||
|
||||
if (this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch))) {
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
|
||||
if (isSplitNeeded) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, true, (numWaitEvents > 0), [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
ze_copy_region_t dstRegionLocal = {};
|
||||
ze_copy_region_t srcRegionLocal = {};
|
||||
|
@ -413,7 +415,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
|||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0) || isSplitNeeded, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -467,7 +469,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
|||
|
||||
ze_result_t ret;
|
||||
|
||||
if (this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size)) {
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size);
|
||||
if (isSplitNeeded) {
|
||||
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
|
||||
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, false, false, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
|
@ -479,7 +482,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
|||
} else {
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
||||
}
|
||||
return flushImmediate(ret, false, false, false, nullptr);
|
||||
return flushImmediate(ret, false, false, isSplitNeeded, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
|
@ -472,6 +473,8 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
|
|||
size, alignment, &srcPtr);
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList0->csr);
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
@ -483,6 +486,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
|
|||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u);
|
||||
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
|
||||
|
||||
context->freeMem(srcPtr);
|
||||
context->freeMem(dstPtr);
|
||||
|
|
Loading…
Reference in New Issue