diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 47c2287e15..ba1d581f78 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,7 +29,6 @@ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/context/driver_diagnostics.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/gtpin/gtpin_notify.h" @@ -583,8 +582,6 @@ cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue commandQueue) { TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; } - // if host queue not found - try to query device queue - retainQueue(commandQueue, retVal); TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; @@ -601,8 +598,6 @@ cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue commandQueue) { TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; } - // if host queue not found - try to query device queue - releaseQueue(commandQueue, retVal); TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; @@ -628,7 +623,6 @@ cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue commandQueue, TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; } - getQueueInfo(commandQueue, paramName, paramValueSize, paramValue, paramValueSizeRet, retVal); TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; @@ -5155,8 +5149,6 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte return commandQueue; } - auto minimumCreateDeviceQueueFlags = static_cast(CL_QUEUE_ON_DEVICE | - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); auto tokenValue = properties ? *properties : 0; auto propertiesAddress = properties; @@ -5199,12 +5191,6 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } - } else if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { - if (pContext->getDefaultDeviceQueue()) { - err.set(CL_OUT_OF_RESOURCES); - TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); - return commandQueue; - } } if (getCmdQueueProperties(properties, CL_QUEUE_SIZE) > maxOnDeviceQueueSize) { @@ -5251,30 +5237,18 @@ cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context conte TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } - - auto maskedFlags = commandQueueProperties & minimumCreateDeviceQueueFlags; - - if (maskedFlags == minimumCreateDeviceQueueFlags) { - commandQueue = DeviceQueue::create( - pContext, - pDevice, - *properties, - retVal); - - } else { - commandQueue = CommandQueue::create( - pContext, - pDevice, - properties, - false, - retVal); - if (pContext->isProvidingPerformanceHints()) { - pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); - if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { - pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); - if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { - pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); - } + commandQueue = CommandQueue::create( + pContext, + pDevice, + properties, + false, + retVal); + if (pContext->isProvidingPerformanceHints()) { + pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); + if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { + pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); + if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { + pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); } } } @@ -5506,23 +5480,7 @@ cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, return retVal; } - auto pDeviceQueue = castToObject(static_cast<_device_queue *>(commandQueue)); - - if (!pDeviceQueue) { - retVal = CL_INVALID_COMMAND_QUEUE; - TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); - return retVal; - } - - if (&pDeviceQueue->getContext() != pContext) { - retVal = CL_INVALID_COMMAND_QUEUE; - TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); - return retVal; - } - - pContext->setDefaultDeviceQueue(pDeviceQueue); - - retVal = CL_SUCCESS; + retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } diff --git a/opencl/source/api/cl_types.h b/opencl/source/api/cl_types.h index 7bcaf40418..fd9bcbe9f9 100644 --- a/opencl/source/api/cl_types.h +++ b/opencl/source/api/cl_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ struct _cl_command_queue : public ClDispatch { // device_queue is a type used internally struct _device_queue : public _cl_command_queue { }; -typedef _device_queue *device_queue; struct _cl_context : public ClDispatch { bool isSharedContext = false; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index c0968af72e..01a04665c0 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 60c6687d7e..b161b13f43 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -20,7 +20,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/get_info_status_mapper.h" @@ -134,14 +133,6 @@ uint32_t Context::getMaxRootDeviceIndex() const { return maxRootDeviceIndex; } -DeviceQueue *Context::getDefaultDeviceQueue() { - return defaultDeviceQueue; -} - -void Context::setDefaultDeviceQueue(DeviceQueue *queue) { - defaultDeviceQueue = queue; -} - CommandQueue *Context::getSpecialQueue(uint32_t rootDeviceIndex) { return specialQueues[rootDeviceIndex]; } diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index fcf07f23d4..77a31fa961 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -30,7 +30,6 @@ class AsyncEventsHandler; struct BuiltInKernel; class CommandQueue; class Device; -class DeviceQueue; class MemObj; class MemoryManager; class SharingFunctions; @@ -117,9 +116,6 @@ class Context : public BaseObject<_cl_context> { uint32_t getMaxRootDeviceIndex() const; - DeviceQueue *getDefaultDeviceQueue(); - void setDefaultDeviceQueue(DeviceQueue *queue); - CommandQueue *getSpecialQueue(uint32_t rootDeviceIndex); void setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex); void overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex); @@ -218,7 +214,6 @@ class Context : public BaseObject<_cl_context> { SVMAllocsManager *svmAllocsManager = nullptr; MapOperationsStorage mapOperationsStorage = {}; StackVec specialQueues; - DeviceQueue *defaultDeviceQueue = nullptr; DriverDiagnostics *driverDiagnostics = nullptr; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); diff --git a/opencl/source/device_queue/CMakeLists.txt b/opencl/source/device_queue/CMakeLists.txt deleted file mode 100644 index a53e591413..0000000000 --- a/opencl/source/device_queue/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# -# Copyright (C) 2018-2022 Intel Corporation -# -# SPDX-License-Identifier: MIT -# - -set(RUNTIME_SRCS_DEVICE_QUEUE - ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h -) -target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE}) -set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE}) -add_subdirectories() diff --git a/opencl/source/device_queue/device_queue.cpp b/opencl/source/device_queue/device_queue.cpp deleted file mode 100644 index ca2b63615d..0000000000 --- a/opencl/source/device_queue/device_queue.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2018-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "opencl/source/device_queue/device_queue.h" - -#include "shared/source/helpers/hw_helper.h" -#include "shared/source/memory_manager/memory_manager.h" - -#include "opencl/source/cl_device/cl_device.h" -#include "opencl/source/context/context.h" -#include "opencl/source/helpers/dispatch_info.h" -#include "opencl/source/helpers/queue_helpers.h" - -namespace NEO { -DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE] = {}; - -const uint32_t DeviceQueue::numberOfDeviceEnqueues = 128; - -DeviceQueue::DeviceQueue(Context *context, - ClDevice *device, - cl_queue_properties &properties) : DeviceQueue() { - this->context = context; - this->device = device; - - if (context) { - context->incRefInternal(); - } - - commandQueueProperties = getCmdQueueProperties(&properties, CL_QUEUE_PROPERTIES); - queueSize = getCmdQueueProperties(&properties, CL_QUEUE_SIZE); - - if (queueSize == 0) { - queueSize = device->getDeviceInfo().queueOnDevicePreferredSize; - } - - storeProperties(&properties); - allocateResources(); - initDeviceQueue(); -} - -DeviceQueue *DeviceQueue::create(Context *context, ClDevice *device, - const cl_queue_properties &properties, - cl_int &errcodeRet) { - errcodeRet = CL_SUCCESS; - DeviceQueue *deviceQueue = context->getDefaultDeviceQueue(); - - auto isDefaultDeviceQueue = getCmdQueueProperties(&properties) & - static_cast(CL_QUEUE_ON_DEVICE_DEFAULT); - if (isDefaultDeviceQueue && deviceQueue) { - deviceQueue->retain(); - return deviceQueue; - } - - auto funcCreate = deviceQueueFactory[device->getRenderCoreFamily()]; - DEBUG_BREAK_IF(nullptr == funcCreate); - deviceQueue = funcCreate(context, device, const_cast(properties)); - - context->setDefaultDeviceQueue(deviceQueue); - - return deviceQueue; -} - -DeviceQueue::~DeviceQueue() { - - for (uint32_t i = 0; i < IndirectHeap::NUM_TYPES; i++) { - if (heaps[i]) - delete heaps[i]; - } - - if (queueBuffer) - device->getMemoryManager()->freeGraphicsMemory(queueBuffer); - if (eventPoolBuffer) - device->getMemoryManager()->freeGraphicsMemory(eventPoolBuffer); - if (slbBuffer) - device->getMemoryManager()->freeGraphicsMemory(slbBuffer); - if (stackBuffer) - device->getMemoryManager()->freeGraphicsMemory(stackBuffer); - if (queueStorageBuffer) - device->getMemoryManager()->freeGraphicsMemory(queueStorageBuffer); - if (dshBuffer) - device->getMemoryManager()->freeGraphicsMemory(dshBuffer); - if (debugQueue) - device->getMemoryManager()->freeGraphicsMemory(debugQueue); - if (context) { - context->setDefaultDeviceQueue(nullptr); - context->decRefInternal(); - } -} - -Device &DeviceQueue::getDevice() { - return device->getDevice(); -} - -ClDevice *DeviceQueue::getClDevice() const { - return device; -} - -cl_int DeviceQueue::getCommandQueueInfo(cl_command_queue_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet); -} - -void DeviceQueue::storeProperties(const cl_queue_properties *properties) { - if (properties) { - for (size_t i = 0; properties[i] != 0; i += 2) { - propertiesVector.push_back(properties[i]); - propertiesVector.push_back(properties[i + 1]); - } - propertiesVector.push_back(0); - } -} - -void DeviceQueue::allocateResources() { - auto &caps = device->getDeviceInfo(); - - uint32_t alignedQueueSize = alignUp(queueSize, MemoryConstants::pageSize); - auto rootDeviceIndex = device->getRootDeviceIndex(); - auto deviceBitfield = device->getDeviceBitfield(); - queueBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, alignedQueueSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - - auto eventPoolBufferSize = static_cast(caps.maxOnDeviceEvents) * sizeof(IGIL_DeviceEvent) + sizeof(IGIL_EventPool); - eventPoolBufferSize = alignUp(eventPoolBufferSize, MemoryConstants::pageSize); - eventPoolBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, eventPoolBufferSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - - auto maxEnqueue = static_cast(alignedQueueSize) / sizeof(IGIL_CommandHeader); - auto expectedStackSize = maxEnqueue * sizeof(uint32_t) * 3; // 3 full loads of commands - expectedStackSize = alignUp(expectedStackSize, MemoryConstants::pageSize); - stackBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, expectedStackSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - memset(stackBuffer->getUnderlyingBuffer(), 0, stackBuffer->getUnderlyingBufferSize()); - - auto queueStorageSize = alignedQueueSize * 2; // place for 2 full loads of queue_t - queueStorageSize = alignUp(queueStorageSize, MemoryConstants::pageSize); - queueStorageBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, queueStorageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - memset(queueStorageBuffer->getUnderlyingBuffer(), 0, queueStorageBuffer->getUnderlyingBufferSize()); - - auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); - const size_t IDTSize = numberOfIDTables * interfaceDescriptorEntries * hwHelper.getInterfaceDescriptorDataSize(); - - // Additional padding of PAGE_SIZE for PageFaults just after DSH to satisfy hw requirements - auto dshSize = (PARALLEL_SCHEDULER_HW_GROUPS + 2) * MAX_DSH_SIZE_PER_ENQUEUE * 8 + IDTSize + colorCalcStateSize + MemoryConstants::pageSize; - dshSize = alignUp(dshSize, MemoryConstants::pageSize); - dshBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dshSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - - debugQueue = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, deviceBitfield}); - debugData = (DebugDataBuffer *)debugQueue->getUnderlyingBuffer(); - memset(debugQueue->getUnderlyingBuffer(), 0, debugQueue->getUnderlyingBufferSize()); -} - -void DeviceQueue::initDeviceQueue() { - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - auto &caps = device->getDeviceInfo(); - - memset(queueBuffer->getUnderlyingBuffer(), 0x0, queueBuffer->getUnderlyingBufferSize()); - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1; - igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; - igilCmdQueue->m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); - igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER; - - auto igilEventPool = reinterpret_cast(eventPoolBuffer->getUnderlyingBuffer()); - memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize()); - igilEventPool->m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); - igilEventPool->m_size = caps.maxOnDeviceEvents; -} - -void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, - uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNodeBase *hwTimeStamp, bool isCcsUsed) { - setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount, isCcsUsed); - addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount); -} - -void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) { - return; -} - -void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) { - return; -} - -void DeviceQueue::resetDeviceQueue() { - return; -} - -IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) { - return nullptr; -} -} // namespace NEO diff --git a/opencl/source/device_queue/device_queue.h b/opencl/source/device_queue/device_queue.h deleted file mode 100644 index c2f2ed5f04..0000000000 --- a/opencl/source/device_queue/device_queue.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2018-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "shared/source/helpers/hw_info.h" -#include "shared/source/indirect_heap/indirect_heap.h" -#include "shared/source/memory_manager/graphics_allocation.h" - -#include "opencl/source/api/cl_types.h" -#include "opencl/source/execution_model/device_enqueue.h" -#include "opencl/source/helpers/base_object.h" - -namespace NEO { -class ClDevice; -class CommandQueue; -class Context; -class Device; -class Kernel; -class Event; -struct MultiDispatchInfo; -class HwTimeStamps; -class TagNodeBase; - -template <> -struct OpenCLObjectMapper<_device_queue> { - typedef class DeviceQueue DerivedType; -}; - -class DeviceQueue : public BaseObject<_device_queue> { - public: - static const cl_ulong objectMagic = 0x1734547890087154LL; - - DeviceQueue() { - for (uint32_t i = 0; i < IndirectHeap::NUM_TYPES; i++) { - heaps[i] = nullptr; - } - offsetDsh = 0; - } - DeviceQueue(Context *context, ClDevice *device, cl_queue_properties &properties); - ~DeviceQueue() override; - - Device &getDevice(); - ClDevice *getClDevice() const; - Context &getContext() { return *context; } - cl_uint getQueueSize() { return queueSize; } - cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; } - const std::vector &getPropertiesVector() const { return propertiesVector; } - GraphicsAllocation *getQueueBuffer() { return queueBuffer; } - GraphicsAllocation *getEventPoolBuffer() { return eventPoolBuffer; } - GraphicsAllocation *getSlbBuffer() { return slbBuffer; } - GraphicsAllocation *getStackBuffer() { return stackBuffer; } - GraphicsAllocation *getQueueStorageBuffer() { return queueStorageBuffer; } - GraphicsAllocation *getDshBuffer() { return dshBuffer; } - GraphicsAllocation *getDebugQueue() { return debugQueue; } - - bool isProfilingEnabled() { - return !!(commandQueueProperties & CL_QUEUE_PROFILING_ENABLE); - } - - static DeviceQueue *create(Context *context, - ClDevice *device, - const cl_queue_properties &properties, - cl_int &errcodeRet); - - cl_int getCommandQueueInfo(cl_command_queue_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet); - - void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNodeBase *hwTimeStamp, bool isCcsUsed); - - virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed); - virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount); - - MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() { - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - auto igilCriticalSection = const_cast(&igilCmdQueue->m_controls.m_CriticalSection); - return *igilCriticalSection == ExecutionModelCriticalSection::Free; - } - - virtual void resetDeviceQueue(); - virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type); - - void acquireEMCriticalSection() { - if (DebugManager.flags.EnableNullHardware.get()) { - return; - } - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Taken; - } - - uint32_t getDshOffset() const { - return offsetDsh; - } - - enum ExecutionModelCriticalSection { - Free = 0, - Taken = 1 - }; - - static const uint32_t numberOfIDTables = 2; - static const uint32_t interfaceDescriptorEntries = 64; - static const uint32_t colorCalcStateSize = 192; - static const uint32_t schedulerIDIndex = 62; - static const uint32_t numberOfDeviceEnqueues; - - protected: - void storeProperties(const cl_queue_properties *properties); - void allocateResources(); - void initDeviceQueue(); - - Context *context = nullptr; - ClDevice *device = nullptr; - cl_command_queue_properties commandQueueProperties = 0; - std::vector propertiesVector; - cl_uint queueSize = 0; - - GraphicsAllocation *queueBuffer = nullptr; - GraphicsAllocation *eventPoolBuffer = nullptr; - GraphicsAllocation *slbBuffer = nullptr; - GraphicsAllocation *stackBuffer = nullptr; - GraphicsAllocation *queueStorageBuffer = nullptr; - GraphicsAllocation *dshBuffer = nullptr; - GraphicsAllocation *debugQueue = nullptr; - - DebugDataBuffer *debugData = nullptr; - - IndirectHeap *heaps[IndirectHeap::NUM_TYPES]; - uint32_t offsetDsh; -}; - -typedef DeviceQueue *(*DeviceQueueCreateFunc)( - Context *context, ClDevice *device, cl_queue_properties &properties); -} // namespace NEO diff --git a/opencl/source/device_queue/device_queue_hw_base.inl b/opencl/source/device_queue/device_queue_hw_base.inl deleted file mode 100644 index bbbe0c39ec..0000000000 --- a/opencl/source/device_queue/device_queue_hw_base.inl +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2019-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "shared/source/helpers/hw_helper.h" -#include "shared/source/helpers/pipe_control_args.h" -#include "shared/source/helpers/preamble.h" -#include "shared/source/helpers/string.h" -#include "shared/source/memory_manager/memory_manager.h" -#include "shared/source/utilities/tag_allocator.h" - -#include "opencl/source/command_queue/gpgpu_walker.h" -#include "opencl/source/device_queue/device_queue_hw.h" -#include "opencl/source/helpers/hardware_commands_helper.h" - -namespace NEO { -template -void DeviceQueueHw::allocateSlbBuffer() { - auto slbSize = getMinimumSlbSize() + getWaCommandsSize(); - slbSize *= 128; //num of enqueues - slbSize += sizeof(MI_BATCH_BUFFER_START); - slbSize = alignUp(slbSize, MemoryConstants::pageSize); - slbSize += DeviceQueueHw::getExecutionModelCleanupSectionSize(); - slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction - slbSize = alignUp(slbSize, MemoryConstants::pageSize); - - slbBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), slbSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, device->getDeviceBitfield()}); -} - -template -void DeviceQueueHw::resetDeviceQueue() { - auto &caps = device->getDeviceInfo(); - auto igilEventPool = reinterpret_cast(eventPoolBuffer->getUnderlyingBuffer()); - - memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize()); - igilEventPool->m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); - igilEventPool->m_size = caps.maxOnDeviceEvents; - - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - igilQueue = igilCmdQueue; - - igilCmdQueue->m_controls.m_StackSize = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - igilCmdQueue->m_controls.m_StackTop = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - igilCmdQueue->m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT; - igilCmdQueue->m_controls.m_IDTAfterFirstPhase = 1; - igilCmdQueue->m_controls.m_CurrentIDToffset = 1; - igilCmdQueue->m_controls.m_PreviousStorageTop = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); - igilCmdQueue->m_controls.m_PreviousStackTop = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - igilCmdQueue->m_controls.m_DebugNextBlockID = 0xFFFFFFFF; - igilCmdQueue->m_controls.m_QstorageSize = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); - igilCmdQueue->m_controls.m_QstorageTop = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); - igilCmdQueue->m_controls.m_IsProfilingEnabled = static_cast(isProfilingEnabled()); - igilCmdQueue->m_controls.m_IsSimulation = static_cast(device->isSimulation()); - - igilCmdQueue->m_controls.m_LastScheduleEventNumber = 0; - igilCmdQueue->m_controls.m_PreviousNumberOfQueues = 0; - igilCmdQueue->m_controls.m_EnqueueMarkerScheduled = 0; - igilCmdQueue->m_controls.m_SecondLevelBatchOffset = 0; - igilCmdQueue->m_controls.m_TotalNumberOfQueues = 0; - igilCmdQueue->m_controls.m_EventTimestampAddress = 0; - igilCmdQueue->m_controls.m_ErrorCode = 0; - igilCmdQueue->m_controls.m_CurrentScheduleEventNumber = 0; - igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder = 0x00; - igilCmdQueue->m_controls.m_DebugNextBlockGWS = 0; - - // set first stack element in surface at value "1", it protects Scheduler in corner case when StackTop is empty after Child execution - auto stack = static_cast(stackBuffer->getUnderlyingBuffer()); - stack += ((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - *stack = 1; - - igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; - igilCmdQueue->m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); - igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER; - - igilCmdQueue->m_controls.m_SchedulerEarlyReturn = DebugManager.flags.SchedulerSimulationReturnInstance.get(); - igilCmdQueue->m_controls.m_SchedulerEarlyReturnCounter = 0; - - buildSlbDummyCommands(); - - igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1; - - igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Free; - - resetDSH(); -} - -template -void DeviceQueueHw::initPipeControl(PIPE_CONTROL *pc) { - auto cmd = GfxFamily::cmdInitPipeControl; - cmd.setStateCacheInvalidationEnable(0x1); - cmd.setDcFlushEnable(true); - cmd.setPipeControlFlushEnable(true); - cmd.setTextureCacheInvalidationEnable(true); - cmd.setCommandStreamerStallEnable(true); - - *pc = cmd; -} - -template -void DeviceQueueHw::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) { - // CleanUp Section - auto offset = slbCS.getUsed(); - auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset; - slbCS.getSpace(alignmentSize); - offset = slbCS.getUsed(); - - igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed()); - GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true); - - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - - if (hwTimeStamp != nullptr) { - uint64_t timeStampAddress = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS); - igilQueue->m_controls.m_EventTimestampAddress = timeStampAddress; - - addProfilingEndCmds(timeStampAddress); - - //enable preemption - addLriCmd(false); - } - - uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection; - PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - slbCS, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - criticalSectionAddress, - ExecutionModelCriticalSection::Free, - device->getHardwareInfo(), - args); - - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - slbCS, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - tagAddress, - taskCount, - device->getHardwareInfo(), - args); - - addMediaStateClearCmds(); - - auto pBBE = slbCS.getSpaceForCmd(); - *pBBE = GfxFamily::cmdInitBatchBufferEnd; - - igilQueue->m_controls.m_CleanupSectionSize = (uint32_t)(slbCS.getUsed() - offset); -} - -template -void DeviceQueueHw::resetDSH() { - if (heaps[IndirectHeap::DYNAMIC_STATE]) { - heaps[IndirectHeap::DYNAMIC_STATE]->replaceBuffer(heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase(), heaps[IndirectHeap::DYNAMIC_STATE]->getMaxAvailableSpace()); - heaps[IndirectHeap::DYNAMIC_STATE]->getSpace(colorCalcStateSize); - } -} - -template -IndirectHeap *DeviceQueueHw::getIndirectHeap(IndirectHeap::Type type) { - UNRECOVERABLE_IF(type != IndirectHeap::DYNAMIC_STATE); - - if (!heaps[type]) { - heaps[type] = new IndirectHeap(dshBuffer); - // get space for colorCalc and 2 ID tables at the beginning - heaps[type]->getSpace(colorCalcStateSize); - } - return heaps[type]; -} - -template -size_t DeviceQueueHw::getCSPrefetchSize() { - return 512; -} - -template -void DeviceQueueHw::addLriCmd(bool setArbCheck) { - // CTXT_PREMP_DBG offset - constexpr uint32_t registerAddress = 0x2248u; - uint32_t value = 0u; - if (setArbCheck) { - // set only bit 8 (Preempt On MI_ARB_CHK Only) - value = 0x00000100; - } - - LriHelper::program(&slbCS, - registerAddress, - value, - false); -} - -template -size_t DeviceQueueHw::getExecutionModelCleanupSectionSize() { - size_t totalSize = 0; - totalSize += sizeof(PIPE_CONTROL) + - 2 * sizeof(MI_LOAD_REGISTER_REG) + - sizeof(MI_LOAD_REGISTER_IMM) + - sizeof(PIPE_CONTROL) + - sizeof(MI_MATH) + - NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE); - - totalSize += getProfilingEndCmdsSize(); - totalSize += getMediaStateClearCmdsSize(); - - totalSize += 4 * sizeof(PIPE_CONTROL); - totalSize += sizeof(MI_BATCH_BUFFER_END); - return totalSize; -} - -template -size_t DeviceQueueHw::getProfilingEndCmdsSize() { - size_t size = 0; - size += sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM); - size += sizeof(MI_LOAD_REGISTER_IMM); - return size; -} - -template -void DeviceQueueHw::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {} - -template -uint64_t DeviceQueueHw::getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed) { - auto blockAllocation = blockInfo->getGraphicsAllocation(); - DEBUG_BREAK_IF(!blockAllocation); - - auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu; - - auto &hardwareInfo = device.getHardwareInfo(); - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - - if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - blockKernelStartPointer += blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP; - } - return blockKernelStartPointer; -} - -} // namespace NEO diff --git a/opencl/source/device_queue/device_queue_hw_skl_and_later.inl b/opencl/source/device_queue/device_queue_hw_skl_and_later.inl deleted file mode 100644 index 429f5119fe..0000000000 --- a/opencl/source/device_queue/device_queue_hw_skl_and_later.inl +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (C) 2019-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_container/command_encoder.h" -#include "shared/source/command_stream/stream_properties.h" - -#include "opencl/source/cl_device/cl_device.h" -#include "opencl/source/device_queue/device_queue_hw_base.inl" -#include "opencl/source/program/block_kernel_manager.h" - -namespace NEO { - -template -size_t DeviceQueueHw::getMinimumSlbSize() { - using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; - - return sizeof(MEDIA_STATE_FLUSH) + - sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + - sizeof(PIPE_CONTROL) + - sizeof(GPGPU_WALKER) + - sizeof(MEDIA_STATE_FLUSH) + - sizeof(PIPE_CONTROL) + - DeviceQueueHw::getCSPrefetchSize(); -} - -template -void DeviceQueueHw::buildSlbDummyCommands() { - using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; - - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - auto slbEndOffset = igilCmdQueue->m_controls.m_SLBENDoffsetInBytes; - size_t commandsSize = getMinimumSlbSize() + getWaCommandsSize(); - size_t numEnqueues = numberOfDeviceEnqueues; - - // buildSlbDummyCommands is called from resetDeviceQueue() - reset slbCS each time - slbCS.replaceBuffer(slbBuffer->getUnderlyingBuffer(), slbBuffer->getUnderlyingBufferSize()); - - if (slbEndOffset >= 0) { - DEBUG_BREAK_IF(slbEndOffset % commandsSize != 0); - //We always overwrite at most one enqueue space with BB_START command pointing to cleanup section - //if SLBENDoffset is the at the end then BB_START added after scheduler did not corrupt anything so no need to regenerate - numEnqueues = (slbEndOffset == static_cast(commandsSize)) ? 0 : 1; - slbCS.getSpace(slbEndOffset); - } - - for (size_t i = 0; i < numEnqueues; i++) { - auto mediaStateFlush = slbCS.getSpaceForCmd(); - *mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; - - addArbCheckCmdWa(); - - addMiAtomicCmdWa((uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder); - - auto mediaIdLoadSpace = slbCS.getSpaceForCmd(); - auto mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; - mediaIdLoad.setInterfaceDescriptorTotalLength(2048); - - auto dataStartAddress = colorCalcStateSize; - mediaIdLoad.setInterfaceDescriptorDataStartAddress(dataStartAddress + sizeof(INTERFACE_DESCRIPTOR_DATA) * schedulerIDIndex); - *mediaIdLoadSpace = mediaIdLoad; - - addLriCmdWa(true); - - if (isProfilingEnabled()) { - addPipeControlCmdWa(); - auto pipeControl = slbCS.getSpaceForCmd(); - initPipeControl(pipeControl); - - } else { - auto noop = slbCS.getSpace(sizeof(PIPE_CONTROL)); - memset(noop, 0x0, sizeof(PIPE_CONTROL)); - addPipeControlCmdWa(true); - } - - auto gpgpuWalkerSpace = slbCS.getSpaceForCmd(); - auto gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; - gpgpuWalker.setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); - gpgpuWalker.setThreadGroupIdXDimension(1); - gpgpuWalker.setThreadGroupIdYDimension(1); - gpgpuWalker.setThreadGroupIdZDimension(1); - gpgpuWalker.setRightExecutionMask(0xFFFFFFFF); - gpgpuWalker.setBottomExecutionMask(0xFFFFFFFF); - *gpgpuWalkerSpace = gpgpuWalker; - - mediaStateFlush = slbCS.getSpaceForCmd(); - *mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; - - addArbCheckCmdWa(); - - addPipeControlCmdWa(); - - auto pipeControl2 = slbCS.getSpaceForCmd(); - initPipeControl(pipeControl2); - - addLriCmdWa(false); - - auto prefetch = slbCS.getSpace(getCSPrefetchSize()); - memset(prefetch, 0x0, getCSPrefetchSize()); - } - - // always the same BBStart position (after 128 enqueues) - auto bbStartOffset = (commandsSize * 128) - slbCS.getUsed(); - slbCS.getSpace(bbStartOffset); - - auto bbStartSpace = slbCS.getSpaceForCmd(); - auto bbStart = GfxFamily::cmdInitBatchBufferStart; - auto slbPtr = reinterpret_cast(slbBuffer->getUnderlyingBuffer()); - bbStart.setBatchBufferStartAddress(slbPtr); - *bbStartSpace = bbStart; - - igilCmdQueue->m_controls.m_CleanupSectionSize = 0; - igilQueue->m_controls.m_CleanupSectionAddress = 0; -} - -template -void DeviceQueueHw::addMediaStateClearCmds() { - typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE; - - addPipeControlCmdWa(); - - auto pipeControlSpace = slbCS.getSpaceForCmd(); - auto pipeControl = GfxFamily::cmdInitPipeControl; - pipeControl.setGenericMediaStateClear(true); - pipeControl.setCommandStreamerStallEnable(true); - addDcFlushToPipeControlWa(&pipeControl); - *pipeControlSpace = pipeControl; - - auto pVfeState = PreambleHelper::getSpaceForVfeState(&slbCS, device->getHardwareInfo(), EngineGroupType::RenderCompute); - StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeState, device->getHardwareInfo(), 0u, 0, device->getSharedDeviceInfo().maxFrontEndThreads, emptyProperties); -} - -template -size_t DeviceQueueHw::getMediaStateClearCmdsSize() { - using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; - // PC with GenreicMediaStateClear + WA PC - size_t size = 2 * sizeof(PIPE_CONTROL); - - // VFE state cmds - size += sizeof(PIPE_CONTROL); - size += sizeof(MEDIA_VFE_STATE); - return size; -} - -template -void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) { - using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; - void *pDSH = dynamicStateHeap.getCpuBase(); - // Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries. - auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2); - - // Parent's dsh is located after ColorCalcState and 2 ID tables - igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp(static_cast(parentKernel->getDynamicStateHeapSize()), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); - igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize(); - - igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart; - igilCmdQueue->m_controls.m_ParentDSHOffset = offsetDsh; - - uint32_t blockIndex = parentIDCount; - - pDSH = ptrOffset(pDSH, colorCalcStateSize); - - INTERFACE_DESCRIPTOR_DATA *pIDDestination = static_cast(pDSH); - - BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - uint32_t maxBindingTableCount = 0; - uint32_t totalBlockSSHSize = 0; - - igilCmdQueue->m_controls.m_StartBlockID = blockIndex; - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - - auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed); - - auto bindingTableCount = static_cast(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries); - maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount); - - totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - - surfaceStateHeap.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - auto btOffset = EncodeSurfaceState::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount, - pBlockInfo->heapInfo.pSsh, - pBlockInfo->heapInfo.SurfaceStateHeapSize, - bindingTableCount, - pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); - - parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast(btOffset)); - - // Determine SIMD size - uint32_t simd = pBlockInfo->getMaxSimdSize(); - - uint32_t idOffset = pBlockInfo->kernelDescriptor.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset; - const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset)); - - pIDDestination[blockIndex + i] = *pBlockID; - pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32); - pIDDestination[blockIndex + i].setKernelStartPointer(static_cast(blockKernelStartPointer)); - pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); - EncodeDispatchKernel::programBarrierEnable(pIDDestination[blockIndex + i], - pBlockInfo->kernelDescriptor.kernelAttributes.barrierCount, - device->getHardwareInfo()); - - // Set offset to sampler states, block's DHSOffset is added by scheduler - pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast(pBlockInfo->getBorderColorStateSize())); - - auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels; - auto grfSize = device->getDeviceInfo().grfSize; - auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels); - auto numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); - - // HW requires a minimum of 1 GRF of perThreadData for each thread in a thread group - // when sizeCrossThreadData != 0 - numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); - pIDDestination[blockIndex + i].setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); - } - - igilCmdQueue->m_controls.m_BTmaxSize = alignUp(maxBindingTableCount * (uint32_t)sizeof(BINDING_TABLE_STATE), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE); - igilCmdQueue->m_controls.m_BTbaseOffset = alignUp((uint32_t)surfaceStateHeap.getUsed(), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE); - igilCmdQueue->m_controls.m_CurrentSSHoffset = igilCmdQueue->m_controls.m_BTbaseOffset; -} - -} // namespace NEO diff --git a/opencl/source/execution_model/CMakeLists.txt b/opencl/source/execution_model/CMakeLists.txt deleted file mode 100644 index 30be20a336..0000000000 --- a/opencl/source/execution_model/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (C) 2018-2020 Intel Corporation -# -# SPDX-License-Identifier: MIT -# - -set(RUNTIME_SRCS_EXECUTION_MODEL - ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/device_enqueue.h -) - -target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EXECUTION_MODEL}) diff --git a/opencl/source/execution_model/device_enqueue.h b/opencl/source/execution_model/device_enqueue.h deleted file mode 100644 index 7184d979b8..0000000000 --- a/opencl/source/execution_model/device_enqueue.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -// Uncomment this macro to build "empty" schedulers -//#define WA_DISABLE_SCHEDULERS 1 - -#if !defined(__OPENCL_VERSION__) -#include - -typedef uint32_t uint; -typedef uint64_t ulong; -#endif - -#define OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD (4 * sizeof(uint)) -#define OCLRT_SIZEOF_MEDIA_CURBE_LOAD_DEVICE_CMD (4 * sizeof(uint)) -#define OCLRT_SIZEOF_MEDIA_STATE_FLUSH (2 * sizeof(uint)) -#define OCLRT_SIZEOF_MI_ATOMIC_CMD (11 * sizeof(uint)) -#define OCLRT_SIZEOF_MEDIA_VFE_STATE_CMD (9 * sizeof(uint)) -#define OCLRT_SIZEOF_MI_ARB_CHECK (1 * sizeof(uint)) - -#define OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET (4) -#define OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET (11) -#define OCLRT_SIZEOF_MEDIA_CURBE_LOAD_DEVICE_CMD_DWORD_OFFSET (4) -#define OCLRT_IMM_LOAD_REGISTER_CMD_DEVICE_CMD_DWORD_OFFSET (3) - -#define OCLRT_SIZEOF_MSFLUSH_DWORD (2) -#define OCLRT_SIZEOF_MI_ARB_CHECK_DWORD (1) -#define OCLRT_SIZEOF_MEDIA_VFE_STATE_DWORD (9) - -#define OCLRT_BATCH_BUFFER_END_CMD (83886080) - -//Constant buffer stuff -#define COMPILER_DATA_PARAMETER_GLOBAL_SURFACE (49) - -#define SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT (50) - -#define SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER_SHIFT (63) -#define SCHEDULER_DATA_PARAMETER_SAMPLER_SHIFT (51) -#define SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE (2 * SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT) - -#define CS_PREFETCH_SIZE (8 * 64) - -#define ALL_BITS_SET_DWORD_MASK (0xffffffff) -#define DWORD_SIZE_IN_BITS (32) - -#define CL_sRGB 0x10BF -#define CL_sRGBX 0x10C0 -#define CL_sRGBA 0x10C1 -#define CL_sBGRA 0x10C2 - -//scheduler currently can spawn up to 8 GPGPU_WALKERS between scheduler runs, so it needs 8 * 3 HW threads for scheduling blocks + 1 HW thread to scheduler next scheduler -//each HW group consist of 3 HW threads that are capable of scheduling 1 block - -//!!! Make sure value of this define equals MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS in DeviceEnqueueInternalTypes.h -#define PARALLEL_SCHEDULER_HW_GROUPS (8) -#define PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP (3) -#define PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 (3) -#define PARALLEL_SCHEDULER_HW_GROUPS_IN_THREADS (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP * PARALLEL_SCHEDULER_HW_GROUPS) - -#define PARALLEL_SCHEDULER_NUMBER_HW_THREADS (PARALLEL_SCHEDULER_HW_GROUPS_IN_THREADS + 1) - -//parallel scheduler 2.0 is compiled in simd8 -#define PARALLEL_SCHEDULER_COMPILATION_SIZE_20 (8) - -#define HW_GROUP_ID_SHIFT(COMPILATION_SIZE) ((COMPILATION_SIZE & 0x10) ? 4 : 3) - -#define GRF_SIZE (32) -#define SIZEOF_3GRFS (3 * GRF_SIZE) - -//estimation for dynamic payload size -#define SCHEDULER_DYNAMIC_PAYLOAD_SIZE (PARALLEL_SCHEDULER_NUMBER_HW_THREADS * SIZEOF_3GRFS) - -//assume that max DSH per walker is 9472B ( assuming registers can take up to 4KB, and max dynamic payload is around 96B * 56(HW threads) it should be fine. -#define MAX_DSH_SIZE_PER_ENQUEUE 9472 - -#define MAX_BINDING_TABLE_INDEX (253) -#define MAX_SSH_PER_KERNEL_SIZE (MAX_BINDING_TABLE_INDEX * 64) //max SSH that can be one kernel. It is 253 binding table entries multiplied by the Surface State size. - -#define OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(ArgOffset) (ArgOffset + MAX_SSH_PER_KERNEL_SIZE) -#define OCLRT_IMAGE_MAX_OBJECT_ID (MAX_SSH_PER_KERNEL_SIZE - 1) -#define OCLRT_SAMPLER_MIN_OBJECT_ID (MAX_SSH_PER_KERNEL_SIZE) - -typedef enum tagDebugDataTypes { - DBG_DEFAULT = 0, - DBG_COMMAND_QUEUE = 1, - DBG_EVENTS_UPDATE = 2, - DBG_EVENTS_NUMBER = 3, - DBG_STACK_UPDATE = 4, - DBG_BEFORE_PATCH = 5, - DBG_KERNELID = 6, - DBG_DSHOFFSET = 7, - DBG_IDOFFSET = 8, - DBG_AFTER_PATCH = 9, - DBG_UNSPECIFIED = 10, - DBG_ENQUEUES_NUMBER = 11, - DBG_LOCAL_ID, - DBG_WKG_ID, - DBG_SCHEDULER_END, - // Add here new debug enums - DBG_MAX -} DebugDataTypes; -// Struct for debugging kernels -typedef struct -{ - DebugDataTypes m_dataType; - uint m_dataSize; -} DebugDataInfo; -typedef struct -{ - enum DDBFlags { DDB_HAS_DATA_INFO = 1, - DDB_SCHEDULER_PROFILING = 2, - DDB_COMMAND_QUEUE_RAW = 4 } ddbFlags; - uint m_size; - uint m_stackTop; //index of data stack - uint m_dataInfoTop; //index of the top of DataInfo stack, this stacks grows with decrementing address - uint m_stackBottom; - uint m_dataInfoBottom; //index of the bottom of DataInfo - uint m_dataInfoSize; - uint m_flags; - - uint m_offset; //current offset indicates free place - uint m_data[100]; //buffer -} DebugDataBuffer; - -#pragma pack(push) -#pragma pack(4) -#include "DeviceEnqueueInternalTypes.h" -#pragma pack(pop) diff --git a/opencl/source/helpers/CMakeLists.txt b/opencl/source/helpers/CMakeLists.txt index bb28b36da9..d1a8afb912 100644 --- a/opencl/source/helpers/CMakeLists.txt +++ b/opencl/source/helpers/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # diff --git a/opencl/source/helpers/queue_helpers.h b/opencl/source/helpers/queue_helpers.h index 61531df1e2..1c41561e8d 100644 --- a/opencl/source/helpers/queue_helpers.h +++ b/opencl/source/helpers/queue_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/get_info_status_mapper.h" namespace NEO { @@ -22,9 +21,6 @@ inline void releaseVirtualEvent(CommandQueue &commandQueue) { } } -inline void releaseVirtualEvent(DeviceQueue &commandQueue) { -} - inline bool isCommandWithoutKernel(uint32_t commandType) { return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) || @@ -108,15 +104,7 @@ cl_int getQueueInfo(QueueType *queue, case CL_QUEUE_PROPERTIES: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getCommandQueueProperties())); break; - case CL_QUEUE_DEVICE_DEFAULT: - retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getContext().getDefaultDeviceQueue())); - break; case CL_QUEUE_SIZE: - if (std::is_same::value) { - auto devQ = reinterpret_cast(queue); - retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(devQ->getQueueSize())); - break; - } retVal = CL_INVALID_COMMAND_QUEUE; break; case CL_QUEUE_PROPERTIES_ARRAY: { diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 103a4a1ed2..37e50c5c22 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -21,7 +21,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/enqueue_common.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/source/helpers/enqueue_properties.h" diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 6643101f18..dcee1e760e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -36,8 +36,6 @@ #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/source/execution_model/device_enqueue.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/dispatch_info.h" @@ -283,9 +281,6 @@ cl_int Kernel::initialize() { } else if (arg.getTraits().typeQualifiers.pipeQual) { kernelArgHandlers[i] = &Kernel::setArgPipe; kernelArguments[i].type = PIPE_OBJ; - } else if (arg.getExtendedTypeInfo().isDeviceQueue) { - kernelArgHandlers[i] = &Kernel::setArgDevQueue; - kernelArguments[i].type = DEVICE_QUEUE_OBJ; } else { kernelArgHandlers[i] = &Kernel::setArgBuffer; kernelArguments[i].type = BUFFER_OBJ; @@ -1730,34 +1725,6 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex, return retVal; } -cl_int Kernel::setArgDevQueue(uint32_t argIndex, - size_t argSize, - const void *argVal) { - if (argVal == nullptr) { - return CL_INVALID_ARG_VALUE; - } - - if (argSize != sizeof(cl_command_queue)) { - return CL_INVALID_ARG_SIZE; - } - - auto clDeviceQueue = *(static_cast(argVal)); - auto pDeviceQueue = castToObject(clDeviceQueue); - - if (pDeviceQueue == nullptr) { - return CL_INVALID_DEVICE_QUEUE; - } - - storeKernelArg(argIndex, DEVICE_QUEUE_OBJ, clDeviceQueue, argVal, argSize); - - const auto &argAsPtr = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); - auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), argAsPtr.stateless); - patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, - static_cast(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch())); - - return CL_SUCCESS; -} - void Kernel::setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler) { if (kernelArgHandlers.size() <= argIndex) { kernelArgHandlers.resize(argIndex + 1); @@ -1824,39 +1791,6 @@ void Kernel::provideInitializationHints() { } } -void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { - const auto &defaultQueueSurfaceAddress = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; - if (isValidOffset(defaultQueueSurfaceAddress.stateless) && crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), defaultQueueSurfaceAddress.stateless); - patchWithRequiredSize(patchLocation, defaultQueueSurfaceAddress.pointerSize, - static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); - } - if (isValidOffset(defaultQueueSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), defaultQueueSurfaceAddress.bindful); - Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), - (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); - } -} - -void Kernel::patchEventPool(DeviceQueue *devQueue) { - const auto &eventPoolSurfaceAddress = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - - if (isValidOffset(eventPoolSurfaceAddress.stateless) && crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), eventPoolSurfaceAddress.stateless); - patchWithRequiredSize(patchLocation, eventPoolSurfaceAddress.pointerSize, - static_cast(devQueue->getEventPoolBuffer()->getGpuAddressToPatch())); - } - - if (isValidOffset(eventPoolSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), eventPoolSurfaceAddress.bindful); - auto eventPoolBuffer = devQueue->getEventPoolBuffer(); - Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(), - (void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0, - kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); - } -} - bool Kernel::usesSyncBuffer() const { return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesSyncBuffer; } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index bae569733c..eadd382273 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -23,7 +23,6 @@ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" @@ -34,6 +33,7 @@ namespace NEO { struct CompletionStamp; class Buffer; +class CommandQueue; class CommandStreamReceiver; class GraphicsAllocation; class ImageTransformer; @@ -221,8 +221,6 @@ class Kernel : public ReferenceTrackedObject { return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; } - void patchDefaultDeviceQueue(DeviceQueue *devQueue); - void patchEventPool(DeviceQueue *devQueue); bool usesSyncBuffer() const; void patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOffset); void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless); @@ -271,10 +269,6 @@ class Kernel : public ReferenceTrackedObject { size_t argSize, const void *argVal); - cl_int setArgDevQueue(uint32_t argIndex, - size_t argSize, - const void *argVal); - void storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, diff --git a/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp b/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp index 0b23029ee8..707399d484 100644 --- a/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp +++ b/opencl/test/unit_test/api/cl_create_command_queue_with_properties_tests.cpp @@ -14,7 +14,6 @@ #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" @@ -78,13 +77,6 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, GivenPropertiesWhenCreatingComma CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_MED_KHR, 0}; - const auto minimumCreateDeviceQueueFlags = static_cast(CL_QUEUE_ON_DEVICE | - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); - const auto deviceQueueShouldBeCreated = (commandQueueProperties & minimumCreateDeviceQueueFlags) == minimumCreateDeviceQueueFlags; - if (deviceQueueShouldBeCreated && !castToObject(testedClDevice)->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { - return; - } - bool queueOnDeviceUsed = false; bool priorityHintsUsed = false; bool throttleHintsUsed = false; @@ -131,17 +123,9 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, GivenPropertiesWhenCreatingComma EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, cmdQ); } - auto deviceQ = static_cast(cmdQ); - auto deviceQueueObj = castToObject(deviceQ); auto commandQueueObj = castToObject(cmdQ); - if (deviceQueueShouldBeCreated) { // created device queue - ASSERT_NE(deviceQueueObj, nullptr); - ASSERT_EQ(commandQueueObj, nullptr); - } else { // created host queue - ASSERT_EQ(deviceQueueObj, nullptr); - ASSERT_NE(commandQueueObj, nullptr); - } + ASSERT_NE(commandQueueObj, nullptr); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); @@ -154,8 +138,6 @@ TEST_P(clCreateCommandQueueWithPropertiesTests, GivenPropertiesWhenCreatingComma static cl_command_queue_properties commandQueueProperties[] = { 0, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_PROFILING_ENABLE}; static cl_uint queueSizes[] = diff --git a/opencl/test/unit_test/api/cl_get_device_info_tests.inl b/opencl/test/unit_test/api/cl_get_device_info_tests.inl index 45f6527d58..459d996bcb 100644 --- a/opencl/test/unit_test/api/cl_get_device_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_device_info_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -115,7 +115,7 @@ TEST_F(clGetDeviceInfoTests, givenOpenCLDeviceWhenAskedForSupportedSvmTypeThenCo TEST(clGetDeviceGlobalMemSizeTests, givenDebugFlagForGlobalMemSizePercentWhenAskedForGlobalMemSizeThenAdjustedGlobalMemSizeIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(100u); - ulong globalMemSize100percent = 0u; + uint64_t globalMemSize100percent = 0u; auto hwInfo = *defaultHwInfo; @@ -124,14 +124,14 @@ TEST(clGetDeviceGlobalMemSizeTests, givenDebugFlagForGlobalMemSizePercentWhenAsk auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_GLOBAL_MEM_SIZE, - sizeof(ulong), + sizeof(uint64_t), &globalMemSize100percent, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(globalMemSize100percent, 0u); DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(50u); - ulong globalMemSize50percent = 0u; + uint64_t globalMemSize50percent = 0u; hwInfo = *defaultHwInfo; @@ -140,7 +140,7 @@ TEST(clGetDeviceGlobalMemSizeTests, givenDebugFlagForGlobalMemSizePercentWhenAsk retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_GLOBAL_MEM_SIZE, - sizeof(ulong), + sizeof(uint64_t), &globalMemSize50percent, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); diff --git a/opencl/test/unit_test/api/cl_release_command_queue_tests.inl b/opencl/test/unit_test/api/cl_release_command_queue_tests.inl index 7f9ada062d..87ba43de27 100644 --- a/opencl/test/unit_test/api/cl_release_command_queue_tests.inl +++ b/opencl/test/unit_test/api/cl_release_command_queue_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -16,37 +16,13 @@ #include using namespace NEO; -namespace DeviceHostQueue { -typedef ::testing::Types QueueTypes; -template -class clReleaseCommandQueueTypeTests : public DeviceHostQueueFixture {}; +namespace ULT { -TYPED_TEST_CASE(clReleaseCommandQueueTypeTests, QueueTypes); - -TYPED_TEST(clReleaseCommandQueueTypeTests, GivenValidCmdQueueWhenReleasingCmdQueueThenSucessIsReturned) { - if (std::is_same::value && !this->pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { - return; - } - - using BaseType = typename TypeParam::BaseType; - - auto queue = this->createClQueue(); - ASSERT_EQ(CL_SUCCESS, this->retVal); - auto qObject = castToObject(static_cast(queue)); - ASSERT_NE(qObject, nullptr); - - this->retVal = clReleaseCommandQueue(queue); - EXPECT_EQ(CL_SUCCESS, this->retVal); -} - -TEST(clReleaseCommandQueueTypeTests, GivenNullCmdQueueWhenReleasingCmdQueueThenClInvalidCommandQueueErrorIsReturned) { +TEST(clReleaseCommandQueueTest, GivenNullCmdQueueWhenReleasingCmdQueueThenClInvalidCommandQueueErrorIsReturned) { auto retVal = clReleaseCommandQueue(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } -} // namespace DeviceHostQueue - -namespace ULT { typedef api_tests clReleaseCommandQueueTests; diff --git a/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl b/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl index c99b142768..4ef3469a2a 100644 --- a/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl +++ b/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,7 +12,7 @@ using namespace NEO; namespace DeviceHostQueue { -typedef ::testing::Types QueueTypes; +typedef ::testing::Types QueueTypes; template class clRetainReleaseCommandQueueTests : public DeviceHostQueueFixture {}; @@ -20,10 +20,6 @@ class clRetainReleaseCommandQueueTests : public DeviceHostQueueFixture {}; TYPED_TEST_CASE(clRetainReleaseCommandQueueTests, QueueTypes); TYPED_TEST(clRetainReleaseCommandQueueTests, GivenValidCommandQueueWhenRetainingAndReleasingThenReferenceCountIsUpdatedCorrectly) { - if (std::is_same::value && !this->pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { - return; - } - using BaseType = typename TypeParam::BaseType; auto queue = this->createClQueue(); diff --git a/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl b/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl index af0bbddb10..4e3c5cec97 100644 --- a/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl +++ b/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl @@ -9,7 +9,6 @@ #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue.h" #include "cl_api_tests.h" diff --git a/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp b/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp index c3d6a2d2e3..e9c786b880 100644 --- a/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -93,20 +93,6 @@ TEST_P(GetCommandQueueInfoTest, givenNonDeviceQueueWhenQueryingQueueSizeThenInva EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } -TEST_P(GetCommandQueueInfoTest, GivenClQueueDeviceDefaultWhenGettingCommandQueueInfoThenSuccessIsReturned) { - cl_command_queue commandQueueReturned = nullptr; - - auto retVal = pCmdQ->getCommandQueueInfo( - CL_QUEUE_DEVICE_DEFAULT, - sizeof(commandQueueReturned), - &commandQueueReturned, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - - // host queue can't be default device queue - EXPECT_NE(pCmdQ, commandQueueReturned); -} - TEST_P(GetCommandQueueInfoTest, GivenInvalidParameterWhenGettingCommandQueueInfoThenInvalidValueIsReturned) { cl_uint parameterReturned = 0; cl_command_queue_info invalidParameter = 0xdeadbeef; diff --git a/opencl/test/unit_test/context/context_tests.cpp b/opencl/test/unit_test/context/context_tests.cpp index 3f62e7fafe..8ad8a31c99 100644 --- a/opencl/test/unit_test/context/context_tests.cpp +++ b/opencl/test/unit_test/context/context_tests.cpp @@ -18,7 +18,6 @@ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.inl" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" diff --git a/opencl/test/unit_test/fixtures/device_host_queue_fixture.cpp b/opencl/test/unit_test/fixtures/device_host_queue_fixture.cpp index 0a50dba32f..c4a0ce2681 100644 --- a/opencl/test/unit_test/fixtures/device_host_queue_fixture.cpp +++ b/opencl/test/unit_test/fixtures/device_host_queue_fixture.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -27,77 +27,9 @@ cl_queue_properties deviceQueueProperties::allProperties[5] = { CL_QUEUE_SIZE, 128 * 1024, 0}; -template <> -cl_command_queue DeviceHostQueueFixture::create(cl_context ctx, cl_device_id device, cl_int &retVal, - cl_queue_properties properties[5]) { - cl_queue_properties qProps[5]; - memcpy(qProps, properties, 5 * sizeof(cl_queue_properties)); - qProps[0] = CL_QUEUE_PROPERTIES; - qProps[1] = qProps[1] | deviceQueueProperties::minimumProperties[1]; - return clCreateCommandQueueWithProperties(ctx, device, qProps, &retVal); -} - template <> cl_command_queue DeviceHostQueueFixture::create(cl_context ctx, cl_device_id device, cl_int &retVal, cl_queue_properties properties[5]) { return clCreateCommandQueueWithProperties(ctx, device, properties, &retVal); } - -IGIL_CommandQueue getExpectedInitIgilCmdQueue(DeviceQueue *deviceQueue) { - IGIL_CommandQueue igilCmdQueueInit; - auto queueBuffer = deviceQueue->getQueueBuffer(); - - memset(&igilCmdQueueInit, 0, sizeof(IGIL_CommandQueue)); - igilCmdQueueInit.m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; - igilCmdQueueInit.m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); - igilCmdQueueInit.m_magic = IGIL_MAGIC_NUMBER; - - igilCmdQueueInit.m_controls.m_SLBENDoffsetInBytes = -1; - return igilCmdQueueInit; -} - -IGIL_CommandQueue getExpectedgilCmdQueueAfterReset(DeviceQueue *deviceQueue) { - auto queueBuffer = deviceQueue->getQueueBuffer(); - auto stackBuffer = deviceQueue->getStackBuffer(); - auto queueStorage = deviceQueue->getQueueStorageBuffer(); - - auto deviceQueueIgilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); - IGIL_CommandQueue expectedIgilCmdQueue; - memcpy(&expectedIgilCmdQueue, deviceQueueIgilCmdQueue, sizeof(IGIL_CommandQueue)); - - expectedIgilCmdQueue.m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; - expectedIgilCmdQueue.m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); - expectedIgilCmdQueue.m_magic = IGIL_MAGIC_NUMBER; - - expectedIgilCmdQueue.m_controls.m_SLBENDoffsetInBytes = -1; - expectedIgilCmdQueue.m_controls.m_StackSize = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - expectedIgilCmdQueue.m_controls.m_StackTop = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - expectedIgilCmdQueue.m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT; - expectedIgilCmdQueue.m_controls.m_IDTAfterFirstPhase = 1; - expectedIgilCmdQueue.m_controls.m_CurrentIDToffset = 1; - expectedIgilCmdQueue.m_controls.m_PreviousStorageTop = static_cast(queueStorage->getUnderlyingBufferSize()); - expectedIgilCmdQueue.m_controls.m_PreviousStackTop = - static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); - expectedIgilCmdQueue.m_controls.m_DebugNextBlockID = 0xFFFFFFFF; - expectedIgilCmdQueue.m_controls.m_QstorageSize = static_cast(queueStorage->getUnderlyingBufferSize()); - expectedIgilCmdQueue.m_controls.m_QstorageTop = static_cast(queueStorage->getUnderlyingBufferSize()); - expectedIgilCmdQueue.m_controls.m_IsProfilingEnabled = static_cast(deviceQueue->isProfilingEnabled()); - expectedIgilCmdQueue.m_controls.m_SLBENDoffsetInBytes = -1; - expectedIgilCmdQueue.m_controls.m_IsSimulation = static_cast(deviceQueue->getDevice().isSimulation()); - - expectedIgilCmdQueue.m_controls.m_LastScheduleEventNumber = 0; - expectedIgilCmdQueue.m_controls.m_PreviousNumberOfQueues = 0; - expectedIgilCmdQueue.m_controls.m_EnqueueMarkerScheduled = 0; - expectedIgilCmdQueue.m_controls.m_SecondLevelBatchOffset = 0; - expectedIgilCmdQueue.m_controls.m_TotalNumberOfQueues = 0; - expectedIgilCmdQueue.m_controls.m_EventTimestampAddress = 0; - expectedIgilCmdQueue.m_controls.m_ErrorCode = 0; - expectedIgilCmdQueue.m_controls.m_CurrentScheduleEventNumber = 0; - expectedIgilCmdQueue.m_controls.m_DummyAtomicOperationPlaceholder = 0x00; - expectedIgilCmdQueue.m_controls.m_DebugNextBlockGWS = 0; - - return expectedIgilCmdQueue; -} } // namespace DeviceHostQueue diff --git a/opencl/test/unit_test/fixtures/device_host_queue_fixture.h b/opencl/test/unit_test/fixtures/device_host_queue_fixture.h index 77f9d47905..397320cd71 100644 --- a/opencl/test/unit_test/fixtures/device_host_queue_fixture.h +++ b/opencl/test/unit_test/fixtures/device_host_queue_fixture.h @@ -10,7 +10,6 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" @@ -24,9 +23,6 @@ struct deviceQueueProperties { static cl_queue_properties allProperties[5]; }; -IGIL_CommandQueue getExpectedInitIgilCmdQueue(DeviceQueue *deviceQueue); -IGIL_CommandQueue getExpectedgilCmdQueueAfterReset(DeviceQueue *deviceQueue); - template class DeviceHostQueueFixture : public ApiFixture<>, public ::testing::Test { diff --git a/opencl/test/unit_test/helpers/base_object_tests.cpp b/opencl/test/unit_test/helpers/base_object_tests.cpp index 2d9d91a499..dc49f5a589 100644 --- a/opencl/test/unit_test/helpers/base_object_tests.cpp +++ b/opencl/test/unit_test/helpers/base_object_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,7 +10,6 @@ #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj.h" @@ -100,8 +99,7 @@ typedef ::testing::Types< //Kernel, //Sampler //others... - MockCommandQueue, - DeviceQueue> + MockCommandQueue> BaseObjectTypes; typedef ::testing::Types< @@ -110,8 +108,7 @@ typedef ::testing::Types< Context, Program, Buffer, - MockCommandQueue, - DeviceQueue> + MockCommandQueue> BaseObjectTypesForCastInvalidMagicTest; TYPED_TEST_CASE(BaseObjectTests, BaseObjectTypes); diff --git a/opencl/test/unit_test/helpers/validator_tests.cpp b/opencl/test/unit_test/helpers/validator_tests.cpp index b665e0c781..b22c5b7612 100644 --- a/opencl/test/unit_test/helpers/validator_tests.cpp +++ b/opencl/test/unit_test/helpers/validator_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -54,7 +54,6 @@ REGISTER_TYPED_TEST_CASE_P( // Define new command types to run the parameterized tests typedef ::testing::Types< cl_command_queue, - device_queue, // internal type cl_context, cl_device_id, cl_event, diff --git a/opencl/test/unit_test/mocks/mock_context.cpp b/opencl/test/unit_test/mocks/mock_context.cpp index 18b1b2288b..1848943fb3 100644 --- a/opencl/test/unit_test/mocks/mock_context.cpp +++ b/opencl/test/unit_test/mocks/mock_context.cpp @@ -43,7 +43,6 @@ MockContext::MockContext( contextCallback = funcNotify; userData = data; memoryManager = nullptr; - defaultDeviceQueue = nullptr; driverDiagnostics = nullptr; rootDeviceIndices = {}; maxRootDeviceIndex = std::numeric_limits::max(); diff --git a/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp b/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp index a18c17ca7d..c316ec7cff 100644 --- a/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp +++ b/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,7 +7,6 @@ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" @@ -34,8 +33,7 @@ typedef ::testing::Types< //Kernel, //Sampler //others... - MockCommandQueue, - DeviceQueue> + MockCommandQueue> BaseObjectTypes; TYPED_TEST_CASE(VABaseObjectTests, BaseObjectTypes);