From 816b23f0ba8737034ba851d4e8725fe8b9144088 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Fri, 13 Jan 2023 13:30:59 +0000 Subject: [PATCH] Add event synchronization sandbox test Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz --- .../black_box_tests/common/zello_common.cpp | 99 ++++-- .../black_box_tests/common/zello_common.h | 7 +- .../test/black_box_tests/zello_copy_only.cpp | 22 +- .../test/black_box_tests/zello_immediate.cpp | 8 +- .../test/black_box_tests/zello_sandbox.cpp | 293 +++++++++++++++++- 5 files changed, 367 insertions(+), 62 deletions(-) diff --git a/level_zero/core/test/black_box_tests/common/zello_common.cpp b/level_zero/core/test/black_box_tests/common/zello_common.cpp index e040c92d6f..475f2b3741 100644 --- a/level_zero/core/test/black_box_tests/common/zello_common.cpp +++ b/level_zero/core/test/black_box_tests/common/zello_common.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -65,7 +65,9 @@ bool isCircularDepTest(int argc, char *argv[]) { return false; } - std::cerr << "Circular Dependency Test mode detected" << std::endl; + if (verbose) { + std::cout << "Circular Dependency Test mode detected" << std::endl; + } return true; } @@ -76,7 +78,7 @@ bool isVerbose(int argc, char *argv[]) { return false; } - std::cerr << "Verbose mode detected" << std::endl; + std::cout << "Verbose mode detected" << std::endl; return true; } @@ -84,11 +86,15 @@ bool isVerbose(int argc, char *argv[]) { bool isSyncQueueEnabled(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-s", "--sync"); if (enabled == false) { - std::cerr << "Async Queue detected" << std::endl; + if (verbose) { + std::cout << "Async Queue detected" << std::endl; + } return false; } - std::cerr << "Sync Queue detected" << std::endl; + if (verbose) { + std::cout << "Sync Queue detected" << std::endl; + } return true; } @@ -96,11 +102,15 @@ bool isSyncQueueEnabled(int argc, char *argv[]) { bool isAsyncQueueEnabled(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-as", "--async"); if (enabled == false) { - std::cerr << "Sync Queue detected" << std::endl; + if (verbose) { + std::cout << "Sync Queue detected" << std::endl; + } return false; } - std::cerr << "Async Queue detected" << std::endl; + if (verbose) { + std::cout << "Async Queue detected" << std::endl; + } return true; } @@ -112,7 +122,7 @@ bool isAubMode(int argc, char *argv[]) { } if (verbose) { - std::cerr << "Aub mode detected" << std::endl; + std::cout << "Aub mode detected" << std::endl; } return true; @@ -124,7 +134,9 @@ bool isCommandListShared(int argc, char *argv[]) { return false; } - std::cerr << "Command List shared between tests" << std::endl; + if (verbose) { + std::cout << "Command List shared between tests" << std::endl; + } return true; } @@ -132,8 +144,12 @@ bool isCommandListShared(int argc, char *argv[]) { bool isImmediateFirst(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-i", "--immediate"); - if (verbose && enabled) { - std::cerr << "Immediate Command List executed first" << std::endl; + if (verbose) { + if (enabled) { + std::cout << "Immediate Command List executed first" << std::endl; + } else { + std::cout << "Regular Command List executed first" << std::endl; + } } return enabled; @@ -141,14 +157,16 @@ bool isImmediateFirst(int argc, char *argv[]) { bool getAllocationFlag(int argc, char *argv[], int defaultValue) { int value = getParamValue(argc, argv, "-A", "-allocflag", defaultValue); - std::cerr << "Allocation flag "; - if (value != defaultValue) { - std::cerr << "override "; - } else { - std::cerr << "default "; + if (verbose) { + std::cout << "Allocation flag "; + if (value != defaultValue) { + std::cout << "override "; + } else { + std::cout << "default "; + } + std::bitset<4> bitValue(value); + std::cout << "value 0b" << bitValue << std::endl; } - std::bitset<4> bitValue(value); - std::cerr << "value 0b" << bitValue << std::endl; return value; } @@ -173,22 +191,29 @@ uint32_t getBufferLength(int argc, char *argv[], uint32_t defaultLength) { return defaultLength; } - std::cerr << "Buffer length detected = " << length << std::endl; + if (verbose) { + std::cout << "Buffer length detected = " << length << std::endl; + } return length; } void printResult(bool aubMode, bool outputValidationSuccessful, const std::string &blackBoxName, const std::string ¤tTest) { + std::cout << std::endl + << blackBoxName; + if (!currentTest.empty()) { + std::cout << " " << currentTest; + } + if (aubMode == false) { - std::cout << std::endl - << blackBoxName; - if (!currentTest.empty()) { - std::cout << " " << currentTest; - } std::cout << " Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl << std::endl; + } else { + std::cout << " Results validation disabled in aub mode." + << std::endl + << std::endl; } } @@ -201,7 +226,7 @@ uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) { uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { - std::cout << "No queue groups found!\n"; + std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); @@ -217,17 +242,17 @@ uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) { return computeQueueGroupOrdinal; } -int32_t getCopyOnlyCommandQueueOrdinal(ze_device_handle_t &device) { +uint32_t getCopyOnlyCommandQueueOrdinal(ze_device_handle_t &device) { uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { - std::cout << "No queue groups found!\n"; + std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); - int32_t copyOnlyQueueGroupOrdinal = -1; + uint32_t copyOnlyQueueGroupOrdinal = std::numeric_limits::max(); for (uint32_t i = 0; i < numQueueGroups; i++) { if (!(queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) && (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { copyOnlyQueueGroupOrdinal = i; @@ -262,17 +287,21 @@ ze_command_queue_handle_t createCommandQueue(ze_context_handle_t &context, ze_de return createCommandQueue(context, device, ordinal, ZE_COMMAND_QUEUE_MODE_DEFAULT, ZE_COMMAND_QUEUE_PRIORITY_NORMAL); } -ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) { +ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList, uint32_t ordinal) { ze_command_list_desc_t descriptor = {}; descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; descriptor.pNext = nullptr; descriptor.flags = 0; - descriptor.commandQueueGroupOrdinal = getCommandQueueOrdinal(device); + descriptor.commandQueueGroupOrdinal = ordinal; return zeCommandListCreate(context, device, &descriptor, &cmdList); } +ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) { + return createCommandList(context, device, cmdList, getCommandQueueOrdinal(device)); +} + void createEventPoolAndEvents(ze_context_handle_t &context, ze_device_handle_t &device, ze_event_pool_handle_t &eventPool, @@ -295,12 +324,14 @@ void createEventPoolAndEvents(ze_context_handle_t &context, } } -std::vector zelloGetSubDevices(ze_device_handle_t &device, int &subDevCount) { +std::vector zelloGetSubDevices(ze_device_handle_t &device, uint32_t &subDevCount) { uint32_t deviceCount = 0; std::vector subdevs(deviceCount, nullptr); SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(device, &deviceCount, nullptr)); if (deviceCount == 0) { - std::cout << "No sub device found!\n"; + if (verbose) { + std::cout << "No sub device found!\n"; + } subDevCount = 0; return subdevs; } @@ -316,7 +347,7 @@ std::vector zelloInitContextAndGetDevices(ze_context_handle_ uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); if (driverCount == 0) { - std::cout << "No driver handle found!\n"; + std::cerr << "No driver handle found!\n"; std::terminate(); } @@ -327,7 +358,7 @@ std::vector zelloInitContextAndGetDevices(ze_context_handle_ uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); if (deviceCount == 0) { - std::cout << "No device found!\n"; + std::cerr << "No device found!\n"; std::terminate(); } std::vector devices(deviceCount, nullptr); diff --git a/level_zero/core/test/black_box_tests/common/zello_common.h b/level_zero/core/test/black_box_tests/common/zello_common.h index 4976df6fb9..ede54f4dd8 100644 --- a/level_zero/core/test/black_box_tests/common/zello_common.h +++ b/level_zero/core/test/black_box_tests/common/zello_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -78,7 +78,7 @@ void printResult(bool aubMode, bool outputValidationSuccessful, const std::strin uint32_t getCommandQueueOrdinal(ze_device_handle_t &device); -int32_t getCopyOnlyCommandQueueOrdinal(ze_device_handle_t &device); +uint32_t getCopyOnlyCommandQueueOrdinal(ze_device_handle_t &device); ze_command_queue_handle_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, uint32_t *ordinal, ze_command_queue_mode_t mode, @@ -87,6 +87,7 @@ ze_command_queue_handle_t createCommandQueue(ze_context_handle_t &context, ze_de ze_command_queue_handle_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, uint32_t *ordinal); ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList); +ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList, uint32_t ordinal); void createEventPoolAndEvents(ze_context_handle_t &context, ze_device_handle_t &device, @@ -97,7 +98,7 @@ void createEventPoolAndEvents(ze_context_handle_t &context, ze_event_scope_flag_t signalScope, ze_event_scope_flag_t waitScope); -std::vector zelloGetSubDevices(ze_device_handle_t &device, int &subDevCount); +std::vector zelloGetSubDevices(ze_device_handle_t &device, uint32_t &subDevCount); std::vector zelloInitContextAndGetDevices(ze_context_handle_t &context, ze_driver_handle_t &driverHandle); diff --git a/level_zero/core/test/black_box_tests/zello_copy_only.cpp b/level_zero/core/test/black_box_tests/zello_copy_only.cpp index 8d9cef6ceb..0e35c84b23 100644 --- a/level_zero/core/test/black_box_tests/zello_copy_only.cpp +++ b/level_zero/core/test/black_box_tests/zello_copy_only.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ void testCopyBetweenHeapDeviceAndStack(ze_context_handle_t &context, ze_device_h ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); - if (copyQueueGroup < 0) { + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + if (copyQueueGroup == std::numeric_limits::max()) { std::cout << "No Copy queue group found. Skipping test run\n"; // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) validRet = true; return; @@ -101,8 +101,8 @@ void testCopyBetweenHostMemAndDeviceMem(ze_context_handle_t &context, ze_device_ ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); - if (copyQueueGroup < 0) { + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + if (copyQueueGroup == std::numeric_limits::max()) { std::cout << "No Copy queue group found. Skipping test run\n"; // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) validRet = true; return; @@ -172,8 +172,8 @@ void testRegionCopyOf2DSharedMem(ze_context_handle_t &context, ze_device_handle_ ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); - if (copyQueueGroup < 0) { + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + if (copyQueueGroup == std::numeric_limits::max()) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; @@ -304,8 +304,8 @@ void testSharedMemDataAccessWithoutCopy(ze_context_handle_t &context, ze_device_ ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); - if (copyQueueGroup < 0) { + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + if (copyQueueGroup == std::numeric_limits::max()) { std::cout << "No Copy queue group found. Skipping test run\n"; // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) validRet = true; return; @@ -414,8 +414,8 @@ void testRegionCopyOf3DSharedMem(ze_context_handle_t &context, ze_device_handle_ ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); - if (copyQueueGroup < 0) { + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + if (copyQueueGroup == std::numeric_limits::max()) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; diff --git a/level_zero/core/test/black_box_tests/zello_immediate.cpp b/level_zero/core/test/black_box_tests/zello_immediate.cpp index a12391055a..15588525d0 100644 --- a/level_zero/core/test/black_box_tests/zello_immediate.cpp +++ b/level_zero/core/test/black_box_tests/zello_immediate.cpp @@ -267,12 +267,12 @@ int main(int argc, char *argv[]) { } // Find copy queue in root device, if not found, try subdevices - int32_t copyQueueGroup = 0; + uint32_t copyQueueGroup = 0; bool copyQueueFound = false; auto copyQueueDev = devices[0]; for (auto &rd : devices) { copyQueueGroup = getCopyOnlyCommandQueueOrdinal(rd); - if (copyQueueGroup >= 0) { + if (copyQueueGroup != std::numeric_limits::max()) { copyQueueFound = true; copyQueueDev = rd; if (verbose) { @@ -288,7 +288,7 @@ int main(int argc, char *argv[]) { } copyQueueGroup = 0; for (auto &rd : devices) { - int subDevCount = 0; + uint32_t subDevCount = 0; auto subdevs = zelloGetSubDevices(rd, subDevCount); if (!subDevCount) { @@ -298,7 +298,7 @@ int main(int argc, char *argv[]) { // Find subdev that has a copy engine. If not skip tests for (auto &sd : subdevs) { copyQueueGroup = getCopyOnlyCommandQueueOrdinal(sd); - if (copyQueueGroup >= 0) { + if (copyQueueGroup != std::numeric_limits::max()) { copyQueueFound = true; copyQueueDev = sd; break; diff --git a/level_zero/core/test/black_box_tests/zello_sandbox.cpp b/level_zero/core/test/black_box_tests/zello_sandbox.cpp index 87bdda79e7..9596d6fce9 100644 --- a/level_zero/core/test/black_box_tests/zello_sandbox.cpp +++ b/level_zero/core/test/black_box_tests/zello_sandbox.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -135,6 +135,252 @@ std::string testMemoryTransfer5xString(bool asyncMode, bool immediateFirst, bool return testStream.str(); } +void executeEventSyncForMultiTileAndCopy(ze_context_handle_t &context, ze_device_handle_t &device, + uint32_t flags, bool useImmediate, bool &outputValidationSuccessful) { + uint32_t numEvents = 10; + + ze_event_pool_handle_t eventPool = nullptr; + ze_event_pool_flag_t eventPoolFlags = static_cast(flags); + + ze_event_pool_desc_t eventPoolDesc{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; + eventPoolDesc.count = numEvents; + eventPoolDesc.flags = eventPoolFlags; + + std::vector eventPoolDevices; + ze_device_handle_t subDevice = nullptr; + ze_device_handle_t copyDevice = nullptr; + + std::vector events(numEvents, nullptr); + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; + + ze_command_queue_handle_t cmdQueue = nullptr; + ze_command_list_handle_t cmdList = nullptr; + + ze_command_queue_handle_t cmdQueueCopy = nullptr; + ze_command_list_handle_t cmdListCopy = nullptr; + + ze_command_queue_handle_t cmdQueueSubDevice = nullptr; + ze_command_list_handle_t cmdListSubDevice = nullptr; + + eventPoolDevices.push_back(device); + + uint32_t queueGroup = getCommandQueueOrdinal(device); + uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); + uint32_t subDeviceCopyQueueGroup = std::numeric_limits::max(); + + ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; + cmdQueueDesc.ordinal = queueGroup; + cmdQueueDesc.index = 0; + selectQueueMode(cmdQueueDesc, false); + if (useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList)); + } else { + SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList, queueGroup)); + } + + uint32_t subDevCount = 0; + auto subDevices = zelloGetSubDevices(device, subDevCount); + if (subDevCount == 0) { + if (verbose) { + std::cout << "Skipping multi-tile - subdevice compute sync" << std::endl; + } + } else { + subDevice = subDevices[0]; + eventPoolDevices.push_back(subDevice); + uint32_t subDeviceQueueGroup = getCommandQueueOrdinal(subDevice); + + subDeviceCopyQueueGroup = getCopyOnlyCommandQueueOrdinal(subDevice); + if (subDeviceCopyQueueGroup != std::numeric_limits::max()) { + copyQueueGroup = subDeviceCopyQueueGroup; + } + + cmdQueueDesc.ordinal = subDeviceQueueGroup; + if (useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, subDevice, &cmdQueueDesc, &cmdListSubDevice)); + } else { + SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, subDevice, &cmdQueueDesc, &cmdQueueSubDevice)); + SUCCESS_OR_TERMINATE(createCommandList(context, subDevice, cmdListSubDevice, subDeviceQueueGroup)); + } + } + + if (copyQueueGroup == std::numeric_limits::max()) { + if (verbose) { + std::cout << "Skipping compute - copy sync" << std::endl; + } + } else { + copyDevice = device; + if (subDeviceCopyQueueGroup != std::numeric_limits::max()) { + copyDevice = subDevice; + if (verbose) { + std::cout << "Using subdevice for copy engine" << std::endl; + } + } + + cmdQueueDesc.ordinal = copyQueueGroup; + if (useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, copyDevice, &cmdQueueDesc, &cmdListCopy)); + } else { + SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, copyDevice, &cmdQueueDesc, &cmdQueueCopy)); + SUCCESS_OR_TERMINATE(createCommandList(context, copyDevice, cmdListCopy, copyQueueGroup)); + } + } + + uint32_t eventIndex = 0; + bool createEvents = (subDevice != nullptr) || (copyDevice != nullptr); + + if (createEvents) { + uint32_t eventPoolDevicesNum = static_cast(eventPoolDevices.size()); + auto eventPtr = events.data(); + SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, eventPoolDevicesNum, eventPoolDevices.data(), &eventPool)); + for (uint32_t i = 0; i < numEvents; i++) { + eventDesc.index = i; + SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, eventPtr + i)); + } + } + + if (subDevice) { + if (verbose) { + std::cout << "Running multi-tile - subdevice compute sync" << std::endl; + } + auto fromRootToSubEvent = events[eventIndex++]; + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdList, fromRootToSubEvent)); + auto subEvent = events[eventIndex++]; + + SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdListSubDevice, 1, &fromRootToSubEvent)); + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdListSubDevice, subEvent)); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListSubDevice)); + + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueSubDevice, 1, &cmdListSubDevice, nullptr)); + } + + SUCCESS_OR_TERMINATE(zeEventHostSynchronize(subEvent, std::numeric_limits::max())); + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueSubDevice, std::numeric_limits::max())); + + SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListReset(cmdListSubDevice)); + } + + auto fromSubToRootEvent = events[eventIndex++]; + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdListSubDevice, fromSubToRootEvent)); + auto rootEvent = events[eventIndex++]; + + SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdList, 1, &fromSubToRootEvent)); + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdList, rootEvent)); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListSubDevice)); + + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueSubDevice, 1, &cmdListSubDevice, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + } + + SUCCESS_OR_TERMINATE(zeEventHostSynchronize(rootEvent, std::numeric_limits::max())); + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueSubDevice, std::numeric_limits::max())); + + SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); + } + } + + if (copyDevice) { + if (verbose) { + std::cout << "Running compute - copy sync" << std::endl; + } + auto fromComputeToCopyEvent = events[eventIndex++]; + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdList, fromComputeToCopyEvent)); + auto copyEvent = events[eventIndex++]; + + SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdListCopy, 1, &fromComputeToCopyEvent)); + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdListCopy, copyEvent)); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListCopy)); + + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueCopy, 1, &cmdListCopy, nullptr)); + } + + SUCCESS_OR_TERMINATE(zeEventHostSynchronize(copyEvent, std::numeric_limits::max())); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueCopy, std::numeric_limits::max())); + + SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListReset(cmdListCopy)); + } + + auto fromCopyToComputeEvent = events[eventIndex++]; + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdListCopy, fromCopyToComputeEvent)); + auto computeEvent = events[eventIndex++]; + + SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdList, 1, &fromCopyToComputeEvent)); + SUCCESS_OR_TERMINATE(zeCommandListAppendSignalEvent(cmdList, computeEvent)); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListCopy)); + + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueCopy, 1, &cmdListCopy, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + } + + SUCCESS_OR_TERMINATE(zeEventHostSynchronize(computeEvent, std::numeric_limits::max())); + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueCopy, std::numeric_limits::max())); + } + } + + if (createEvents) { + for (uint32_t i = 0; i < numEvents; i++) { + SUCCESS_OR_TERMINATE(zeEventDestroy(events[i])); + } + SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); + } + if (cmdListCopy) { + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListCopy)); + } + if (cmdQueueCopy) { + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueCopy)); + } + if (cmdListSubDevice) { + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListSubDevice)); + } + if (cmdQueueSubDevice) { + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueSubDevice)); + } + if (cmdList) { + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + } + if (cmdQueue) { + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); + } +} + +std::string testEventSyncForMultiTileAndCopy(bool immediate, bool tsEvent) { + std::ostringstream testStream; + + testStream << "Event Sync For Multi-Tile And Copy " + << (immediate ? "immediate" : "regular") + << " command list" + << " and event pool flags: " + << (tsEvent ? "ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP" : "ZE_EVENT_POOL_FLAG_HOST_VISIBLE"); + + return testStream.str(); +} + using TestBitMask = std::bitset<32>; TestBitMask getTestMask(int argc, char *argv[], uint32_t defaultValue) { @@ -153,6 +399,7 @@ TestBitMask getTestMask(int argc, char *argv[], uint32_t defaultValue) { int main(int argc, char *argv[]) { constexpr uint32_t bitNumberTestMemoryTransfer5x = 0u; + constexpr uint32_t bitNumberTestEventSyncForMultiTileAndCopy = 1u; const std::string blackBoxName = "Zello Sandbox"; std::string currentTest; @@ -172,51 +419,77 @@ int main(int argc, char *argv[]) { SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); printDeviceProperties(deviceProperties); + uint32_t testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; if (testMask.test(bitNumberTestMemoryTransfer5x)) { bool useImmediate = immediateFirst; currentTest = testMemoryTransfer5xString(asyncMode, useImmediate, false); - uint32_t testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; executeMemoryTransferAndValidate(context, device, testFlag, useImmediate, asyncMode, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); if (outputValidationSuccessful || aubMode) { - printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); - currentTest = testMemoryTransfer5xString(asyncMode, useImmediate, true); testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; executeMemoryTransferAndValidate(context, device, testFlag, useImmediate, asyncMode, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); } useImmediate = !useImmediate; if (outputValidationSuccessful || aubMode) { - printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); - currentTest = testMemoryTransfer5xString(asyncMode, useImmediate, false); testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; executeMemoryTransferAndValidate(context, device, testFlag, useImmediate, asyncMode, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); } if (outputValidationSuccessful || aubMode) { - printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); - currentTest = testMemoryTransfer5xString(asyncMode, useImmediate, true); testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; executeMemoryTransferAndValidate(context, device, testFlag, useImmediate, asyncMode, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); + } + } + + if (testMask.test(bitNumberTestEventSyncForMultiTileAndCopy)) { + bool useImmediate = true; + if (outputValidationSuccessful || aubMode) { + currentTest = testEventSyncForMultiTileAndCopy(useImmediate, false); + testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + executeEventSyncForMultiTileAndCopy(context, device, testFlag, useImmediate, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); + } + if (outputValidationSuccessful || aubMode) { + currentTest = testEventSyncForMultiTileAndCopy(useImmediate, true); + testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + executeEventSyncForMultiTileAndCopy(context, device, testFlag, useImmediate, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); + } + + useImmediate = false; + if (outputValidationSuccessful || aubMode) { + currentTest = testEventSyncForMultiTileAndCopy(useImmediate, false); + testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + executeEventSyncForMultiTileAndCopy(context, device, testFlag, useImmediate, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); + } + if (outputValidationSuccessful || aubMode) { + currentTest = testEventSyncForMultiTileAndCopy(useImmediate, true); + testFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + executeEventSyncForMultiTileAndCopy(context, device, testFlag, useImmediate, outputValidationSuccessful); + printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); } } SUCCESS_OR_TERMINATE(zeContextDestroy(context)); - printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); - outputValidationSuccessful = aubMode ? true : outputValidationSuccessful; return outputValidationSuccessful ? 0 : 1; }