diff --git a/opencl/extensions/public/cl_ext_private.h b/opencl/extensions/public/cl_ext_private.h index a65ee3712d..65f4645640 100644 --- a/opencl/extensions/public/cl_ext_private.h +++ b/opencl/extensions/public/cl_ext_private.h @@ -143,3 +143,36 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield; /* cl_queue_properties */ #define CL_QUEUE_SLICE_COUNT_INTEL 0x10021 + +/****************************** +* QUEUE FAMILY SELECTING * +*******************************/ + +/* cl_device_info */ +#define CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL 0x10030 +#define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x10031 + +/* cl_queue_properties */ +#define CL_QUEUE_FAMILY_INTEL 0x10032 +#define CL_QUEUE_INDEX_INTEL 0x10033 +#define CL_QUEUE_CAPABILITY_EVENT_WAIT_LIST_INTEL (1 << 0) +#define CL_QUEUE_CAPABILITY_EVENTS_INTEL (1 << 1) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL (1 << 2) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL (1 << 3) +#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL (1 << 4) +#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL (1 << 5) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL (1 << 6) +#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL (1 << 7) +#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL (1 << 8) +#define CL_QUEUE_CAPABILITY_MARKER_INTEL (1 << 9) +#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 10) +#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 11) +#define CL_QUEUE_CAPABILITY_ALL_INTEL 0xFFFFFFFFFFFFFFFFULL + +typedef cl_bitfield cl_command_queue_capabilities_intel; + +typedef struct _cl_queue_family_properties_intel { + cl_command_queue_properties properties; + cl_command_queue_capabilities_intel capabilities; + cl_uint count; +} cl_queue_family_properties_intel; diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index c845f12970..e32bee0397 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -4882,6 +4882,8 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte tokenValue != CL_QUEUE_PRIORITY_KHR && tokenValue != CL_QUEUE_THROTTLE_KHR && tokenValue != CL_QUEUE_SLICE_COUNT_INTEL && + tokenValue != CL_QUEUE_FAMILY_INTEL && + tokenValue != CL_QUEUE_INDEX_INTEL && !isExtraToken(propertiesAddress)) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); @@ -4956,6 +4958,23 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte return commandQueue; } + bool queueFamilySelected = false; + bool queueSelected = false; + const auto queueFamilyIndex = getCmdQueueProperties(properties, CL_QUEUE_FAMILY_INTEL, &queueFamilySelected); + const auto queueIndex = getCmdQueueProperties(properties, CL_QUEUE_INDEX_INTEL, &queueSelected); + if (queueFamilySelected != queueSelected) { + err.set(CL_INVALID_QUEUE_PROPERTIES); + TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); + return commandQueue; + } + if (queueFamilySelected && + (queueFamilyIndex >= pDevice->getDeviceInfo().queueFamilyProperties.size() || + queueIndex >= pDevice->getDeviceInfo().queueFamilyProperties[queueFamilyIndex].count)) { + err.set(CL_INVALID_QUEUE_PROPERTIES); + TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); + return commandQueue; + } + auto maskedFlags = commandQueueProperties & minimumCreateDeviceQueueFlags; if (maskedFlags == minimumCreateDeviceQueueFlags) { diff --git a/opencl/source/cl_device/cl_device_caps.cpp b/opencl/source/cl_device/cl_device_caps.cpp index 03e7187f51..4ad91b17ca 100644 --- a/opencl/source/cl_device/cl_device_caps.cpp +++ b/opencl/source/cl_device/cl_device_caps.cpp @@ -360,6 +360,26 @@ void ClDevice::initializeCaps() { } } + const std::vector> &queueFamilies = this->getDevice().getEngineGroups(); + if (queueFamilies.size() > 0) { + for (int queueFamilyIndex = 0; queueFamilyIndex < static_cast(EngineGroupType::MaxEngineGroups); queueFamilyIndex++) { + const std::vector &enginesInFamily = queueFamilies.at(queueFamilyIndex); + if (enginesInFamily.size() > 0) { + cl_queue_family_properties_intel properties; + properties.capabilities = CL_QUEUE_CAPABILITY_ALL_INTEL; + properties.count = static_cast(enginesInFamily.size()); + properties.properties = deviceInfo.queueOnHostProperties; + deviceInfo.queueFamilyProperties.push_back(properties); + } + } + } else { + cl_queue_family_properties_intel properties; + properties.capabilities = CL_QUEUE_CAPABILITY_ALL_INTEL; + properties.count = 1; + properties.properties = deviceInfo.queueOnHostProperties; + deviceInfo.queueFamilyProperties.push_back(properties); + } + deviceInfo.preemptionSupported = false; deviceInfo.maxGlobalVariableSize = ocl21FeaturesEnabled ? 64 * KB : 0; deviceInfo.globalVariablePreferredTotalSize = ocl21FeaturesEnabled ? static_cast(sharedDeviceInfo.maxMemAllocSize) : 0; diff --git a/opencl/source/cl_device/cl_device_info.cpp b/opencl/source/cl_device/cl_device_info.cpp index d450ee3ebd..34a735eae9 100644 --- a/opencl/source/cl_device/cl_device_info.cpp +++ b/opencl/source/cl_device/cl_device_info.cpp @@ -188,12 +188,21 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName, src = ¶m; } break; + case CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL: + srcSize = retSize = sizeof(cl_uint); + param = static_cast(deviceInfo.queueFamilyProperties.size()); + src = ¶m; + break; case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: if (simultaneousInterops.size() > 1u) { srcSize = retSize = sizeof(cl_uint) * simultaneousInterops.size(); src = &simultaneousInterops[0]; } break; + case CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL: + src = deviceInfo.queueFamilyProperties.data(); + retSize = srcSize = deviceInfo.queueFamilyProperties.size() * sizeof(cl_queue_family_properties_intel); + break; case CL_DEVICE_REFERENCE_COUNT: { cl_int ref = this->getReference(); DEBUG_BREAK_IF(ref != 1 && !deviceInfo.parentDevice); diff --git a/opencl/source/cl_device/cl_device_info.h b/opencl/source/cl_device/cl_device_info.h index 30f4f253b8..c6c522645b 100644 --- a/opencl/source/cl_device/cl_device_info.h +++ b/opencl/source/cl_device/cl_device_info.h @@ -11,6 +11,8 @@ #include "opencl/extensions/public/cl_ext_private.h" +#include "engine_group_types.h" + #include namespace NEO { @@ -19,116 +21,117 @@ using OpenClCFeaturesContainer = StackVec; // clang-format off struct ClDeviceInfo { - cl_name_version ilsWithVersion[1]; - StackVec builtInKernelsWithVersion; - StackVec openclCAllVersions; - OpenClCFeaturesContainer openclCFeatures; - std::vector extensionsWithVersion; - cl_device_type deviceType; - size_t maxSliceCount; - size_t image3DMaxWidth; - size_t image3DMaxHeight; - size_t maxBufferSize; - size_t maxArraySize; - cl_device_fp_config singleFpConfig; - cl_device_fp_config halfFpConfig; - cl_device_fp_config doubleFpConfig; - cl_ulong globalMemCacheSize; - cl_ulong maxConstantBufferSize; - size_t maxGlobalVariableSize; - size_t globalVariablePreferredTotalSize; - size_t preferredWorkGroupSizeMultiple; - cl_device_exec_capabilities executionCapabilities; - cl_command_queue_properties queueOnHostProperties; - cl_command_queue_properties queueOnDeviceProperties; - const char *builtInKernels; - cl_platform_id platform; - const char *name; - const char *vendor; - const char *driverVersion; - const char *profile; - const char *clVersion; - const char *clCVersion; - const char *spirVersions; - const char *deviceExtensions; - const char *latestConformanceVersionPassed; - cl_device_id parentDevice; - cl_device_affinity_domain partitionAffinityDomain; - cl_uint partitionMaxSubDevices; - cl_device_partition_property partitionProperties[2]; - cl_device_partition_property partitionType[3]; - cl_device_svm_capabilities svmCapabilities; - double platformHostTimerResolution; - size_t planarYuvMaxWidth; - size_t planarYuvMaxHeight; - cl_version numericClVersion; - cl_uint maxComputUnits; - cl_uint maxWorkItemDimensions; - cl_uint maxNumOfSubGroups; - cl_bool independentForwardProgress; - cl_device_atomic_capabilities atomicMemoryCapabilities; - cl_device_atomic_capabilities atomicFenceCapabilities; - cl_bool nonUniformWorkGroupSupport; - cl_bool workGroupCollectiveFunctionsSupport; - cl_bool genericAddressSpaceSupport; - cl_device_device_enqueue_capabilities deviceEnqueueSupport; - cl_bool pipeSupport; - cl_uint preferredVectorWidthChar; - cl_uint preferredVectorWidthShort; - cl_uint preferredVectorWidthInt; - cl_uint preferredVectorWidthLong; - cl_uint preferredVectorWidthFloat; - cl_uint preferredVectorWidthDouble; - cl_uint preferredVectorWidthHalf; - cl_uint nativeVectorWidthChar; - cl_uint nativeVectorWidthShort; - cl_uint nativeVectorWidthInt; - cl_uint nativeVectorWidthLong; - cl_uint nativeVectorWidthFloat; - cl_uint nativeVectorWidthDouble; - cl_uint nativeVectorWidthHalf; - cl_uint maxReadWriteImageArgs; - cl_uint imagePitchAlignment; - cl_uint imageBaseAddressAlignment; - cl_uint maxPipeArgs; - cl_uint pipeMaxActiveReservations; - cl_uint pipeMaxPacketSize; - cl_uint memBaseAddressAlign; - cl_uint minDataTypeAlignSize; - cl_device_mem_cache_type globalMemCacheType; - cl_uint maxConstantArgs; - cl_device_local_mem_type localMemType; - cl_bool endianLittle; - cl_bool deviceAvailable; - cl_bool compilerAvailable; - cl_bool linkerAvailable; - cl_uint queueOnDevicePreferredSize; - cl_uint queueOnDeviceMaxSize; - cl_uint maxOnDeviceQueues; - cl_uint maxOnDeviceEvents; - cl_bool preferredInteropUserSync; - cl_uint referenceCount; - cl_uint preferredPlatformAtomicAlignment; - cl_uint preferredGlobalAtomicAlignment; - cl_uint preferredLocalAtomicAlignment; - cl_bool hostUnifiedMemory; - cl_bool vmeAvcSupportsTextureSampler; - cl_uint vmeAvcVersion; - cl_uint vmeVersion; - cl_uint internalDriverVersion; - cl_uint grfSize; - bool preemptionSupported; + cl_name_version ilsWithVersion[1]; + StackVec builtInKernelsWithVersion; + StackVec openclCAllVersions; + OpenClCFeaturesContainer openclCFeatures; + std::vector extensionsWithVersion; + cl_device_type deviceType; + size_t maxSliceCount; + size_t image3DMaxWidth; + size_t image3DMaxHeight; + size_t maxBufferSize; + size_t maxArraySize; + cl_device_fp_config singleFpConfig; + cl_device_fp_config halfFpConfig; + cl_device_fp_config doubleFpConfig; + cl_ulong globalMemCacheSize; + cl_ulong maxConstantBufferSize; + size_t maxGlobalVariableSize; + size_t globalVariablePreferredTotalSize; + size_t preferredWorkGroupSizeMultiple; + cl_device_exec_capabilities executionCapabilities; + cl_command_queue_properties queueOnHostProperties; + cl_command_queue_properties queueOnDeviceProperties; + const char *builtInKernels; + cl_platform_id platform; + const char *name; + const char *vendor; + const char *driverVersion; + const char *profile; + const char *clVersion; + const char *clCVersion; + const char *spirVersions; + const char *deviceExtensions; + const char *latestConformanceVersionPassed; + cl_device_id parentDevice; + cl_device_affinity_domain partitionAffinityDomain; + cl_uint partitionMaxSubDevices; + cl_device_partition_property partitionProperties[2]; + cl_device_partition_property partitionType[3]; + cl_device_svm_capabilities svmCapabilities; + StackVec(EngineGroupType::MaxEngineGroups)> queueFamilyProperties; + double platformHostTimerResolution; + size_t planarYuvMaxWidth; + size_t planarYuvMaxHeight; + cl_version numericClVersion; + cl_uint maxComputUnits; + cl_uint maxWorkItemDimensions; + cl_uint maxNumOfSubGroups; + cl_bool independentForwardProgress; + cl_device_atomic_capabilities atomicMemoryCapabilities; + cl_device_atomic_capabilities atomicFenceCapabilities; + cl_bool nonUniformWorkGroupSupport; + cl_bool workGroupCollectiveFunctionsSupport; + cl_bool genericAddressSpaceSupport; + cl_device_device_enqueue_capabilities deviceEnqueueSupport; + cl_bool pipeSupport; + cl_uint preferredVectorWidthChar; + cl_uint preferredVectorWidthShort; + cl_uint preferredVectorWidthInt; + cl_uint preferredVectorWidthLong; + cl_uint preferredVectorWidthFloat; + cl_uint preferredVectorWidthDouble; + cl_uint preferredVectorWidthHalf; + cl_uint nativeVectorWidthChar; + cl_uint nativeVectorWidthShort; + cl_uint nativeVectorWidthInt; + cl_uint nativeVectorWidthLong; + cl_uint nativeVectorWidthFloat; + cl_uint nativeVectorWidthDouble; + cl_uint nativeVectorWidthHalf; + cl_uint maxReadWriteImageArgs; + cl_uint imagePitchAlignment; + cl_uint imageBaseAddressAlignment; + cl_uint maxPipeArgs; + cl_uint pipeMaxActiveReservations; + cl_uint pipeMaxPacketSize; + cl_uint memBaseAddressAlign; + cl_uint minDataTypeAlignSize; + cl_device_mem_cache_type globalMemCacheType; + cl_uint maxConstantArgs; + cl_device_local_mem_type localMemType; + cl_bool endianLittle; + cl_bool deviceAvailable; + cl_bool compilerAvailable; + cl_bool linkerAvailable; + cl_uint queueOnDevicePreferredSize; + cl_uint queueOnDeviceMaxSize; + cl_uint maxOnDeviceQueues; + cl_uint maxOnDeviceEvents; + cl_bool preferredInteropUserSync; + cl_uint referenceCount; + cl_uint preferredPlatformAtomicAlignment; + cl_uint preferredGlobalAtomicAlignment; + cl_uint preferredLocalAtomicAlignment; + cl_bool hostUnifiedMemory; + cl_bool vmeAvcSupportsTextureSampler; + cl_uint vmeAvcVersion; + cl_uint vmeVersion; + cl_uint internalDriverVersion; + cl_uint grfSize; + bool preemptionSupported; /* Extensions supported */ - bool nv12Extension; - bool vmeExtension; - bool platformLP; - bool packedYuvExtension; + bool nv12Extension; + bool vmeExtension; + bool platformLP; + bool packedYuvExtension; /*Unified Shared Memory Capabilites*/ - cl_unified_shared_memory_capabilities_intel hostMemCapabilities; - cl_unified_shared_memory_capabilities_intel deviceMemCapabilities; - cl_unified_shared_memory_capabilities_intel singleDeviceSharedMemCapabilities; - cl_unified_shared_memory_capabilities_intel crossDeviceSharedMemCapabilities; - cl_unified_shared_memory_capabilities_intel sharedSystemMemCapabilities; + cl_unified_shared_memory_capabilities_intel hostMemCapabilities; + cl_unified_shared_memory_capabilities_intel deviceMemCapabilities; + cl_unified_shared_memory_capabilities_intel singleDeviceSharedMemCapabilities; + cl_unified_shared_memory_capabilities_intel crossDeviceSharedMemCapabilities; + cl_unified_shared_memory_capabilities_intel sharedSystemMemCapabilities; }; // clang-format on diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index a95ea614b0..e61c0e633f 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -37,6 +37,7 @@ #include "CL/cl_ext.h" +#include #include namespace NEO { @@ -705,9 +706,46 @@ void CommandQueue::storeProperties(const cl_queue_properties *properties) { } void CommandQueue::processProperties(const cl_queue_properties *properties) { + if (properties != nullptr) { + bool specificEngineSelected = false; + cl_uint selectedQueueFamilyIndex = std::numeric_limits::max(); + cl_uint selectedQueueIndex = std::numeric_limits::max(); + + for (auto currentProperties = properties; *currentProperties != 0; currentProperties += 2) { + switch (*currentProperties) { + case CL_QUEUE_FAMILY_INTEL: + selectedQueueFamilyIndex = static_cast(*(currentProperties + 1)); + specificEngineSelected = true; + break; + case CL_QUEUE_INDEX_INTEL: + selectedQueueIndex = static_cast(*(currentProperties + 1)); + specificEngineSelected = true; + break; + } + } + + if (specificEngineSelected) { + if (getDevice().getNumAvailableDevices() == 1) { + auto queueFamily = getDevice().getNonEmptyEngineGroup(selectedQueueFamilyIndex); + auto engine = queueFamily->at(selectedQueueIndex); + auto engineType = engine.getEngineType(); + this->overrideEngine(engineType); + } + } + } processPropertiesExtra(properties); } +void CommandQueue::overrideEngine(aub_stream::EngineType engineType) { + if (engineType == aub_stream::EngineType::ENGINE_BCS) { + bcsEngine = &device->getEngine(engineType, false, false); + timestampPacketContainer = std::make_unique(); + isCopyOnly = true; + } else { + gpgpuEngine = &device->getEngine(engineType, false, false); + } +} + void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) { if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 04076c4ac9..72f43c8086 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -340,6 +340,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void storeProperties(const cl_queue_properties *properties); void processProperties(const cl_queue_properties *properties); void processPropertiesExtra(const cl_queue_properties *properties); + void overrideEngine(aub_stream::EngineType engineType); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); diff --git a/opencl/source/helpers/queue_helpers.h b/opencl/source/helpers/queue_helpers.h index 5b64adc7f3..69770cbb09 100644 --- a/opencl/source/helpers/queue_helpers.h +++ b/opencl/source/helpers/queue_helpers.h @@ -136,16 +136,23 @@ void getQueueInfo(cl_command_queue commandQueue, template returnType getCmdQueueProperties(const cl_queue_properties *properties, - cl_queue_properties propertyName = CL_QUEUE_PROPERTIES) { + cl_queue_properties propertyName = CL_QUEUE_PROPERTIES, + bool *foundValue = nullptr) { if (properties != nullptr) { while (*properties != 0) { if (*properties == propertyName) { + if (foundValue) { + *foundValue = true; + } return static_cast(*(properties + 1)); } properties += 2; } } + if (foundValue) { + *foundValue = false; + } return 0; } bool isExtraToken(const cl_queue_properties *property); diff --git a/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp b/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp index daf448e528..4501290951 100644 --- a/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp +++ b/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp @@ -482,6 +482,74 @@ TEST_F(clCreateCommandQueueWithPropertiesApi, GivenDeviceQueueCreatedWithVarious } } +TEST_F(clCreateCommandQueueWithPropertiesApi, givenQueueFamilySelectedAndNotIndexWhenCreatingQueueThenFail) { + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + 0, + 0, + }; + + auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); + EXPECT_EQ(nullptr, queue); + EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); +} + +TEST_F(clCreateCommandQueueWithPropertiesApi, givenQueueIndexSelectedAndNotFamilyWhenCreatingQueueThenFail) { + cl_queue_properties queueProperties[] = { + CL_QUEUE_INDEX_INTEL, + 0, + 0, + }; + + auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); + EXPECT_EQ(nullptr, queue); + EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); +} + +TEST_F(clCreateCommandQueueWithPropertiesApi, givenValidFamilyAndIndexSelectedWhenCreatingQueueThenReturnSuccess) { + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + 0, + CL_QUEUE_INDEX_INTEL, + 0, + 0, + }; + + auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); + EXPECT_NE(nullptr, queue); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(queue)); +} + +TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidQueueFamilySelectedWhenCreatingQueueThenFail) { + const auto &families = castToObject(testedClDevice)->getDevice().getEngineGroups(); + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + families.size(), + CL_QUEUE_INDEX_INTEL, + 0, + 0, + }; + + auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); + EXPECT_EQ(nullptr, queue); + EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); +} + +TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidQueueIndexSelectedWhenCreatingQueueThenFail) { + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + 0, + CL_QUEUE_INDEX_INTEL, + 50, + 0, + }; + + auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); + EXPECT_EQ(nullptr, queue); + EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); +} + using LowPriorityCommandQueueTest = ::testing::Test; HWTEST_F(LowPriorityCommandQueueTest, GivenDeviceWithSubdevicesWhenCreatingLowPriorityCommandQueueThenEngineFromFirstSubdeviceIsTaken) { DebugManagerStateRestore restorer; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index d50605330d..f17abddff5 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -26,6 +26,7 @@ #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" +#include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" @@ -35,6 +36,7 @@ #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" +#include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" @@ -1273,3 +1275,128 @@ HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingBlockedEnqueu EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); mockCmdQ->isQueueBlocked(); } + +struct CommandQueueOnSpecificEngineTests : ::testing::Test { + static void fillProperties(cl_queue_properties *properties, cl_uint queueFamily, cl_uint queueIndex) { + properties[0] = CL_QUEUE_FAMILY_INTEL; + properties[1] = queueFamily; + properties[2] = CL_QUEUE_INDEX_INTEL; + properties[3] = queueIndex; + properties[4] = 0; + } + + template + class MockHwHelper : public HwHelperHw { + public: + const HwHelper::EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override { + HwHelper::EngineInstancesContainer result{}; + for (int i = 0; i < ccsCount; i++) { + result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); + } + for (int i = 0; i < bcsCount; i++) { + result.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular}); + } + return result; + } + + void addEngineToEngineGroup(std::vector> &engineGroups, + EngineControl &engine, const HardwareInfo &hwInfo) const override { + switch (engine.getEngineType()) { + case aub_stream::ENGINE_CCS: + engineGroups[static_cast(EngineGroupType::Compute)].push_back(engine); + break; + case aub_stream::ENGINE_BCS: + engineGroups[static_cast(EngineGroupType::Copy)].push_back(engine); + break; + default: + break; + } + } + }; + + template + auto overrideHwHelper() { + return RAIIHwHelperFactory{::defaultHwInfo->platform.eRenderCoreFamily}; + } +}; + +HWTEST_F(CommandQueueOnSpecificEngineTests, givenMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseCorrectEngine) { + auto raiiHwHelper = overrideHwHelper>(); + MockContext context{}; + cl_command_queue_properties properties[5] = {}; + + fillProperties(properties, 0, 0); + EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, false, false); + MockCommandQueue queueRcs(&context, context.getDevice(0), properties); + EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); + EXPECT_FALSE(queueRcs.isCopyOnly); + + fillProperties(properties, 1, 0); + EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, false, false); + MockCommandQueue queueBcs(&context, context.getDevice(0), properties); + EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver()); + EXPECT_TRUE(queueBcs.isCopyOnly); + EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); +} + +HWTEST_F(CommandQueueOnSpecificEngineTests, givenRootDeviceAndMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseDefaultEngine) { + auto raiiHwHelper = overrideHwHelper>(); + UltClDeviceFactory deviceFactory{1, 2}; + MockContext context{deviceFactory.rootDevices[0]}; + cl_command_queue_properties properties[5] = {}; + + fillProperties(properties, 0, 0); + EngineControl &defaultEngine = context.getDevice(0)->getDefaultEngine(); + MockCommandQueue defaultQueue(&context, context.getDevice(0), properties); + EXPECT_EQ(&defaultEngine, &defaultQueue.getGpgpuEngine()); + EXPECT_FALSE(defaultQueue.isCopyOnly); +} + +HWTEST_F(CommandQueueOnSpecificEngineTests, givenSubDeviceAndMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseDefaultEngine) { + auto raiiHwHelper = overrideHwHelper>(); + UltClDeviceFactory deviceFactory{1, 2}; + MockContext context{deviceFactory.subDevices[0]}; + cl_command_queue_properties properties[5] = {}; + + fillProperties(properties, 0, 0); + EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, false, false); + MockCommandQueue queueRcs(&context, context.getDevice(0), properties); + EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); + EXPECT_FALSE(queueRcs.isCopyOnly); + + fillProperties(properties, 1, 0); + EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, false, false); + MockCommandQueue queueBcs(&context, context.getDevice(0), properties); + EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver()); + EXPECT_TRUE(queueBcs.isCopyOnly); + EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); +} + +HWTEST_F(CommandQueueOnSpecificEngineTests, givenBcsFamilySelectedWhenCreatingQueueOnSpecificEngineThenInitializeBcsProperly) { + auto raiiHwHelper = overrideHwHelper>(); + MockContext context{}; + cl_command_queue_properties properties[5] = {}; + + fillProperties(properties, 0, 0); + EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, false, false); + MockCommandQueue queueBcs(&context, context.getDevice(0), properties); + EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver()); + EXPECT_TRUE(queueBcs.isCopyOnly); + EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); +} + +HWTEST_F(CommandQueueOnSpecificEngineTests, givenBliterDisabledAndBcsFamilySelectedWhenCreatingQueueOnSpecificEngineThenInitializeBcsProperly) { + DebugManagerStateRestore restore{}; + DebugManager.flags.EnableBlitterOperationsSupport.set(0); + + auto raiiHwHelper = overrideHwHelper>(); + MockContext context{}; + cl_command_queue_properties properties[5] = {}; + + fillProperties(properties, 0, 0); + EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, false, false); + MockCommandQueue queueBcs(&context, context.getDevice(0), properties); + EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver()); + EXPECT_TRUE(queueBcs.isCopyOnly); + EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); +} diff --git a/opencl/test/unit_test/device/device_tests.cpp b/opencl/test/unit_test/device/device_tests.cpp index b5a78fc414..94d4c5c810 100644 --- a/opencl/test/unit_test/device/device_tests.cpp +++ b/opencl/test/unit_test/device/device_tests.cpp @@ -431,3 +431,38 @@ TEST(DeviceGenEngineTest, givenCreatedDeviceWhenRetrievingDefaultEngineThenOsCon auto &defaultEngine = device->getDefaultEngine(); EXPECT_TRUE(defaultEngine.osContext->isDefaultContext()); } + +TEST(DeviceGenEngineTest, givenNoEmptyGroupsWhenGettingNonEmptyGroupsThenReturnCorrectResults) { + const auto nonEmptyEngineGroup = std::vector{EngineControl{nullptr, nullptr}}; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + auto &engineGroups = device->getEngineGroups(); + engineGroups.clear(); + engineGroups.push_back(nonEmptyEngineGroup); + engineGroups.push_back(nonEmptyEngineGroup); + engineGroups.push_back(nonEmptyEngineGroup); + engineGroups.push_back(nonEmptyEngineGroup); + + EXPECT_EQ(&engineGroups[0], device->getNonEmptyEngineGroup(0)); + EXPECT_EQ(&engineGroups[1], device->getNonEmptyEngineGroup(1)); + EXPECT_EQ(&engineGroups[2], device->getNonEmptyEngineGroup(2)); + EXPECT_EQ(&engineGroups[3], device->getNonEmptyEngineGroup(3)); + EXPECT_EQ(nullptr, device->getNonEmptyEngineGroup(4)); +} + +TEST(DeviceGenEngineTest, givenEmptyGroupsWhenGettingNonEmptyGroupsThenReturnCorrectResults) { + const auto emptyEngineGroup = std::vector{}; + const auto nonEmptyEngineGroup = std::vector{EngineControl{nullptr, nullptr}}; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); + auto &engineGroups = device->getEngineGroups(); + engineGroups.clear(); + engineGroups.push_back(emptyEngineGroup); + engineGroups.push_back(nonEmptyEngineGroup); + engineGroups.push_back(emptyEngineGroup); + engineGroups.push_back(nonEmptyEngineGroup); + + EXPECT_EQ(&engineGroups[1], device->getNonEmptyEngineGroup(0)); + EXPECT_EQ(&engineGroups[3], device->getNonEmptyEngineGroup(1)); + EXPECT_EQ(nullptr, device->getNonEmptyEngineGroup(2)); +} diff --git a/opencl/test/unit_test/device/get_device_info_tests.cpp b/opencl/test/unit_test/device/get_device_info_tests.cpp index 28c1e4f982..1505504708 100644 --- a/opencl/test/unit_test/device/get_device_info_tests.cpp +++ b/opencl/test/unit_test/device/get_device_info_tests.cpp @@ -6,10 +6,13 @@ */ #include "shared/source/helpers/get_info.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" +#include "opencl/test/unit_test/helpers/raii_hw_helper.h" +#include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "test.h" @@ -677,6 +680,111 @@ TEST(GetDeviceInfo, WhenQueryingGenericAddressSpaceSupportThenProperValueIsRetur EXPECT_EQ(expectedGenericAddressSpaceSupport, genericAddressSpaceSupport); } +template +class MockHwHelper : public HwHelperHw { + public: + const HwHelper::EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override { + HwHelper::EngineInstancesContainer result{}; + for (int i = 0; i < ccsCount; i++) { + result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); + } + for (int i = 0; i < bcsCount; i++) { + result.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular}); + } + return result; + } + + void addEngineToEngineGroup(std::vector> &engineGroups, + EngineControl &engine, const HardwareInfo &hwInfo) const override { + switch (engine.getEngineType()) { + case aub_stream::ENGINE_CCS: + engineGroups[static_cast(EngineGroupType::Compute)].push_back(engine); + break; + case aub_stream::ENGINE_BCS: + engineGroups[static_cast(EngineGroupType::Copy)].push_back(engine); + break; + default: + break; + } + } + + static auto overrideHwHelper() { + return RAIIHwHelperFactory>{::defaultHwInfo->platform.eRenderCoreFamily}; + } +}; + +using GetDeviceInfoQueueFamilyTest = ::testing::Test; + +HWTEST_F(GetDeviceInfoQueueFamilyTest, givenSingleDeviceWhenInitializingCapsThenReturnCorrectFamilies) { + auto raiiHwHelper = MockHwHelper::overrideHwHelper(); + UltClDeviceFactory deviceFactory{1, 0}; + ClDevice &clDevice = *deviceFactory.rootDevices[0]; + size_t paramRetSize{}; + + cl_uint numQueueFamilies{}; + auto retVal = clDevice.getDeviceInfo(CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL, sizeof(numQueueFamilies), &numQueueFamilies, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(2u, numQueueFamilies); + + cl_queue_family_properties_intel families[static_cast(EngineGroupType::MaxEngineGroups)]; + retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(2u, paramRetSize / sizeof(cl_queue_family_properties_intel)); + + EXPECT_EQ(CL_QUEUE_CAPABILITY_ALL_INTEL, families[0].capabilities); + EXPECT_EQ(3u, families[0].count); + EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); + + EXPECT_EQ(CL_QUEUE_CAPABILITY_ALL_INTEL, families[1].capabilities); + EXPECT_EQ(1u, families[1].count); + EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[1].properties); +} + +HWTEST_F(GetDeviceInfoQueueFamilyTest, givenSubDeviceWhenInitializingCapsThenReturnCorrectFamilies) { + auto raiiHwHelper = MockHwHelper::overrideHwHelper(); + UltClDeviceFactory deviceFactory{1, 2}; + ClDevice &clDevice = *deviceFactory.subDevices[1]; + size_t paramRetSize{}; + + cl_uint numQueueFamilies{}; + auto retVal = clDevice.getDeviceInfo(CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL, sizeof(numQueueFamilies), &numQueueFamilies, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(2u, numQueueFamilies); + + cl_queue_family_properties_intel families[static_cast(EngineGroupType::MaxEngineGroups)]; + retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(2u, paramRetSize / sizeof(cl_queue_family_properties_intel)); + + EXPECT_EQ(CL_QUEUE_CAPABILITY_ALL_INTEL, families[0].capabilities); + EXPECT_EQ(3u, families[0].count); + EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); + + EXPECT_EQ(CL_QUEUE_CAPABILITY_ALL_INTEL, families[1].capabilities); + EXPECT_EQ(1u, families[1].count); + EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[1].properties); +} + +HWTEST_F(GetDeviceInfoQueueFamilyTest, givenDeviceRootDeviceWhenInitializingCapsThenReturnDefaultFamily) { + UltClDeviceFactory deviceFactory{1, 2}; + ClDevice &clDevice = *deviceFactory.rootDevices[0]; + size_t paramRetSize{}; + + cl_uint numQueueFamilies{}; + auto retVal = clDevice.getDeviceInfo(CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL, sizeof(numQueueFamilies), &numQueueFamilies, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, numQueueFamilies); + + cl_queue_family_properties_intel families[static_cast(EngineGroupType::MaxEngineGroups)]; + retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, paramRetSize / sizeof(cl_queue_family_properties_intel)); + + EXPECT_EQ(CL_QUEUE_CAPABILITY_ALL_INTEL, families[0].capabilities); + EXPECT_EQ(1u, families[0].count); + EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); +} + struct GetDeviceInfo : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); @@ -769,6 +877,7 @@ cl_device_info deviceInfoParams[] = { CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, CL_DEVICE_NUMERIC_VERSION, + CL_DEVICE_NUM_QUEUE_FAMILIES_INTEL, CL_DEVICE_OPENCL_C_ALL_VERSIONS, CL_DEVICE_OPENCL_C_FEATURES, CL_DEVICE_OPENCL_C_VERSION, @@ -795,6 +904,7 @@ cl_device_info deviceInfoParams[] = { CL_DEVICE_PRINTF_BUFFER_SIZE, CL_DEVICE_PROFILE, CL_DEVICE_PROFILING_TIMER_RESOLUTION, + CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, diff --git a/opencl/test/unit_test/helpers/queue_helpers_tests.cpp b/opencl/test/unit_test/helpers/queue_helpers_tests.cpp index 02722aaf2d..bd8d8226e0 100644 --- a/opencl/test/unit_test/helpers/queue_helpers_tests.cpp +++ b/opencl/test/unit_test/helpers/queue_helpers_tests.cpp @@ -45,3 +45,25 @@ TEST(QueueHelpersTest, givenPropertyListWithPropertyOfValueZeroWhenGettingProper } } } + +TEST(QueueHelpersTest, givenPropertiesWhenGettingPropertyValuesThenReturnCorrectFoundPropertyValue) { + cl_queue_properties nonExistantProperty = 0xCC; + cl_queue_properties properties[] = { + 0xAA, + 3, + 0xBB, + 0, + 0}; + + bool foundProperty = false; + EXPECT_EQ(properties[1], getCmdQueueProperties(properties, properties[0], &foundProperty)); + EXPECT_TRUE(foundProperty); + + foundProperty = false; + EXPECT_EQ(properties[3], getCmdQueueProperties(properties, properties[2], &foundProperty)); + EXPECT_TRUE(foundProperty); + + foundProperty = false; + EXPECT_EQ(0u, getCmdQueueProperties(properties, nonExistantProperty, &foundProperty)); + EXPECT_FALSE(foundProperty); +} diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 72baaeca0b..d552a7c45c 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -215,6 +215,23 @@ bool Device::isDebuggerActive() const { return deviceInfo.debuggerActive; } +const std::vector *Device::getNonEmptyEngineGroup(size_t index) const { + auto nonEmptyGroupIndex = 0u; + for (auto groupIndex = 0u; groupIndex < engineGroups.size(); groupIndex++) { + const std::vector *currentGroup = &engineGroups[groupIndex]; + if (currentGroup->empty()) { + continue; + } + + if (index == nonEmptyGroupIndex) { + return currentGroup; + } + + nonEmptyGroupIndex++; + } + return nullptr; +} + EngineControl &Device::getEngine(aub_stream::EngineType engineType, bool lowPriority, bool internalUsage) { for (auto &engine : engines) { if (engine.osContext->getEngineType() == engineType && diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 6b3c4af7c0..df289d6987 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -49,6 +49,7 @@ class Device : public ReferenceTrackedObject { std::vector> &getEngineGroups() { return this->engineGroups; } + const std::vector *getNonEmptyEngineGroup(size_t index) const; EngineControl &getEngine(uint32_t index); EngineControl &getDefaultEngine(); EngineControl &getInternalEngine(); diff --git a/shared/source/helpers/definitions/engine_group_types.h b/shared/source/helpers/definitions/engine_group_types.h index 0713a13e85..3e211a9328 100644 --- a/shared/source/helpers/definitions/engine_group_types.h +++ b/shared/source/helpers/definitions/engine_group_types.h @@ -5,6 +5,8 @@ * */ +#pragma once + #include namespace NEO {