mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
pass event host scope flag to walker when single packet events are used
Related-To: NEO-7434 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
946dd27125
commit
29eee35491
@@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams {
|
||||
bool isKernelSplitOperation = false;
|
||||
bool isBuiltInKernel = false;
|
||||
bool isDestinationAllocationInSystemMemory = false;
|
||||
bool isHostSignalScopeEvent = false;
|
||||
};
|
||||
|
||||
struct CmdListReturnPoint {
|
||||
|
||||
@@ -303,13 +303,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
return ret;
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
Event *event = nullptr;
|
||||
if (hEvent) {
|
||||
event = Event::fromHandle(hEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
||||
nullptr, launchParams);
|
||||
@@ -332,9 +333,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
return ret;
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
launchParams.isPredicate = true;
|
||||
|
||||
Event *event = nullptr;
|
||||
if (hEvent) {
|
||||
event = Event::fromHandle(hEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
@@ -346,9 +352,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
for (uint32_t i = 0; i < numKernels; i++) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
launchParams.isPredicate = true;
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
|
||||
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
|
||||
nullptr, launchParams);
|
||||
@@ -1183,17 +1186,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
}
|
||||
|
||||
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
|
||||
kernelCounter += middleSizeBytes > 0 ? 1 : 0;
|
||||
kernelCounter += rightSize > 0 ? 1 : 0;
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
launchParams.isKernelSplitOperation = kernelCounter > 1;
|
||||
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
|
||||
|
||||
@@ -1544,9 +1548,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
@@ -1596,7 +1603,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
}
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isBuiltInKernel = true;
|
||||
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
uint64_t eventAddress = 0;
|
||||
bool isTimestampEvent = false;
|
||||
bool l3FlushEnable = false;
|
||||
bool isHostSignalScopeEvent = false;
|
||||
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
|
||||
if (event) {
|
||||
eventAlloc = &event->getAllocation(this->device);
|
||||
commandContainer.addToResidencyContainer(eventAlloc);
|
||||
|
||||
@@ -1364,5 +1364,171 @@ HWTEST2_F(CreateCommandListXeHpcTest, whenFlagDisabledAndCreateImmediateCommandL
|
||||
EXPECT_TRUE(static_cast<DeviceImp *>(device)->allocationsForReuse->peekIsEmpty());
|
||||
}
|
||||
|
||||
struct AppendKernelXeHpcTestInput {
|
||||
DriverHandle *driver = nullptr;
|
||||
L0::Context *context = nullptr;
|
||||
L0::Device *device = nullptr;
|
||||
};
|
||||
|
||||
template <int32_t usePipeControlMultiPacketEventSync>
|
||||
struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModuleFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
|
||||
LocalMemoryModuleFixture::setUp();
|
||||
|
||||
input.driver = driverHandle.get();
|
||||
input.device = device;
|
||||
input.context = context;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
|
||||
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
|
||||
|
||||
constexpr size_t size = 4096u;
|
||||
constexpr size_t alignment = 4096u;
|
||||
void *ptr = nullptr;
|
||||
const void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
result = input.context->allocDeviceMem(input.device->toHandle(),
|
||||
&deviceDesc,
|
||||
size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
constexpr size_t offset = 32;
|
||||
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
|
||||
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList commands;
|
||||
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
|
||||
commands,
|
||||
commandList->commandContainer.getCommandStream()->getCpuBase(),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
|
||||
EXPECT_NE(0u, itorWalkers.size());
|
||||
for (const auto &it : itorWalkers) {
|
||||
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
|
||||
result = input.context->freeMem(ptr);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testHostSignalScopeHostMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
|
||||
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
|
||||
|
||||
constexpr size_t size = 4096u;
|
||||
constexpr size_t alignment = 4096u;
|
||||
void *ptr = nullptr;
|
||||
const void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = input.context->allocHostMem(&hostDesc, size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
constexpr size_t offset = 32;
|
||||
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
|
||||
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList commands;
|
||||
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
|
||||
commands,
|
||||
commandList->commandContainer.getCommandStream()->getCpuBase(),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
|
||||
EXPECT_NE(0u, itorWalkers.size());
|
||||
for (const auto &it : itorWalkers) {
|
||||
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
|
||||
result = input.context->freeMem(ptr);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
AppendKernelXeHpcTestInput input;
|
||||
};
|
||||
|
||||
using CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<0>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
|
||||
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
|
||||
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
|
||||
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
|
||||
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<1>>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
|
||||
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
|
||||
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
|
||||
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
|
||||
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user