From a807b9a90be1b69c803c41682e09e217312e5cb2 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 24 Aug 2018 08:48:59 +0200 Subject: [PATCH] Initial implementation of Timestamp Packet write Change-Id: Ic498bcf9795f54fbb5fb5a8d07ed17fa70dc4f1a Signed-off-by: Dunajski, Bartosz --- runtime/command_queue/command_queue.h | 2 +- runtime/command_queue/enqueue_common.h | 1 + runtime/command_queue/gpgpu_walker.h | 14 ++ runtime/command_queue/gpgpu_walker.inl | 54 ++++-- runtime/helpers/CMakeLists.txt | 1 + runtime/helpers/timestamp_packet.h | 59 +++++++ runtime/memory_manager/memory_manager.cpp | 26 ++- runtime/memory_manager/memory_manager.h | 3 + runtime/os_interface/DebugVariables_base.inl | 1 + .../command_queue/command_queue_tests.cpp | 4 +- .../command_queue/dispatch_walker_tests.cpp | 27 ++- .../command_queue/drm_requirements_tests.cpp | 4 +- unit_tests/command_queue/finish_tests.cpp | 4 +- .../get_size_required_buffer_tests.cpp | 16 +- .../get_size_required_image_tests.cpp | 12 +- .../command_queue/get_size_required_tests.cpp | 6 +- unit_tests/command_queue/oom_buffer_tests.cpp | 14 +- unit_tests/command_queue/oom_image_tests.cpp | 8 +- unit_tests/command_queue/oom_tests.cpp | 6 +- .../command_stream/command_stream_fixture.h | 2 +- ...mmand_stream_receiver_flush_task_tests.cpp | 16 +- unit_tests/event/user_events_tests.cpp | 2 +- .../parent_kernel_dispatch_tests.cpp | 8 + .../submit_blocked_parent_kernel_tests.cpp | 1 + unit_tests/gen9/test_preemption_gen9.cpp | 22 +-- unit_tests/helpers/CMakeLists.txt | 1 + unit_tests/helpers/hw_parse.h | 2 +- unit_tests/helpers/kernel_commands_tests.cpp | 20 +-- unit_tests/helpers/timestamp_packet_tests.cpp | 155 ++++++++++++++++++ .../memory_manager/memory_manager_tests.cpp | 24 +++ unit_tests/mocks/mock_command_queue.h | 6 + unit_tests/test_files/igdrcl.config | 3 +- 32 files changed, 429 insertions(+), 95 deletions(-) create mode 100644 runtime/helpers/timestamp_packet.h create mode 100644 unit_tests/helpers/timestamp_packet_tests.cpp diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 7415e018dc..6158acc5c0 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -331,7 +331,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { Context &getContext() { return *context; } Context *getContextPtr() { return context; } - LinearStream &getCS(size_t minRequiredSize = 1024u); + MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize); diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index c5be2b12ed..40e94b7a7b 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -269,6 +269,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, &blockedCommandsData, hwTimeStamps, hwPerfCounter, + nullptr, preemption, blockQueue, commandType); diff --git a/runtime/command_queue/gpgpu_walker.h b/runtime/command_queue/gpgpu_walker.h index 82dab80b0c..fd6e8a5a04 100644 --- a/runtime/command_queue/gpgpu_walker.h +++ b/runtime/command_queue/gpgpu_walker.h @@ -33,6 +33,7 @@ #include "runtime/helpers/dispatch_info.h" #include "runtime/helpers/kernel_commands.h" #include "runtime/helpers/task_information.h" +#include "runtime/helpers/timestamp_packet.h" #include "runtime/indirect_heap/indirect_heap.h" #include "runtime/kernel/kernel.h" #include "runtime/program/kernel_info.h" @@ -124,6 +125,8 @@ inline cl_uint computeDimensions(const size_t workItems[3]) { template class GpgpuWalkerHelper { public: + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + static void addAluReadModifyWriteRegister( LinearStream *pCommandStream, uint32_t aluRegister, @@ -199,10 +202,17 @@ class GpgpuWalkerHelper { KernelOperation **blockedCommandsData, HwTimeStamps *hwTimeStamps, OCLRT::HwPerfCounter *hwPerfCounter, + TimestampPacket *timestampPacket, PreemptionMode preemptionMode, bool blockQueue, uint32_t commandType = 0); + static void setupTimestampPacket( + LinearStream *cmdStream, + WALKER_HANDLE walkerHandle, + TimestampPacket *timestampPacket, + TimestampPacket::WriteOperationType writeOperationType); + static void dispatchScheduler( CommandQueue &commandQueue, DeviceQueueHw &devQueueHw, @@ -214,6 +224,7 @@ class GpgpuWalkerHelper { template struct EnqueueOperation { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; static size_t getTotalSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo); static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel); @@ -239,6 +250,9 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext()); expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, &scheduler); } + if (DebugManager.flags.EnableTimestampPacket.get()) { + expectedSizeCS += 2 * sizeof(typename GfxFamily::PIPE_CONTROL); + } return commandQueue.getCS(expectedSizeCS); } diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl index 6262b99f07..ceddcd47bd 100644 --- a/runtime/command_queue/gpgpu_walker.inl +++ b/runtime/command_queue/gpgpu_walker.inl @@ -104,7 +104,6 @@ void GpgpuWalkerHelper::addAluReadModifyWriteRegister( pCmd4->setDestinationRegisterAddress(aluRegister); // Add PIPE_CONTROL to flush caches - typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd5 = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); *pCmd5 = PIPE_CONTROL::sInit(); pCmd5->setCommandStreamerStallEnable(true); @@ -157,7 +156,6 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsStart( HwTimeStamps &hwTimeStamps, OCLRT::LinearStream *commandStream) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; // PIPE_CONTROL for global timestamp uint64_t TimeStampAddress = reinterpret_cast(&(hwTimeStamps.GlobalStartTS)); @@ -185,7 +183,6 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsEnd( OCLRT::LinearStream *commandStream) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; // PIPE_CONTROL for global timestamp auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL)); @@ -340,7 +337,6 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( OCLRT::HwPerfCounter &hwPerfCounter, OCLRT::LinearStream *commandStream) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT; @@ -387,7 +383,6 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( OCLRT::HwPerfCounter &hwPerfCounter, OCLRT::LinearStream *commandStream) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT; @@ -440,6 +435,7 @@ void GpgpuWalkerHelper::dispatchWalker( KernelOperation **blockedCommandsData, HwTimeStamps *hwTimeStamps, OCLRT::HwPerfCounter *hwPerfCounter, + TimestampPacket *timestampPacket, PreemptionMode preemptionMode, bool blockQueue, uint32_t commandType) { @@ -519,6 +515,7 @@ void GpgpuWalkerHelper::dispatchWalker( DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); + size_t currentDispatchIndex = 0; for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); @@ -613,11 +610,20 @@ void GpgpuWalkerHelper::dispatchWalker( // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, true); + bool setupTimestampPacket = (DebugManager.flags.EnableTimestampPacket.get()) && (currentDispatchIndex == numDispatches - 1); + if (setupTimestampPacket) { + GpgpuWalkerHelper::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::Start); + } + // Program the walker. Invokes execution so all state should already be programmed typedef typename GfxFamily::GPGPU_WALKER GPGPU_WALKER; auto pGpGpuWalkerCmd = (GPGPU_WALKER *)commandStream->getSpace(sizeof(GPGPU_WALKER)); *pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker; + if (setupTimestampPacket) { + GpgpuWalkerHelper::setupTimestampPacket(commandStream, pGpGpuWalkerCmd, timestampPacket, TimestampPacket::WriteOperationType::End); + } + size_t globalOffsets[3] = {offset.x, offset.y, offset.z}; size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z}; size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z}; @@ -645,6 +651,7 @@ void GpgpuWalkerHelper::dispatchWalker( GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, false); PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); + currentDispatchIndex++; } // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled @@ -656,6 +663,24 @@ void GpgpuWalkerHelper::dispatchWalker( } } +template +void GpgpuWalkerHelper::setupTimestampPacket( + LinearStream *cmdStream, + WALKER_HANDLE walkerHandle, + TimestampPacket *timestampPacket, + TimestampPacket::WriteOperationType writeOperationType) { + + uint64_t address = timestampPacket->pickAddressForPipeControlWrite(writeOperationType); + + auto pipeControlCmd = cmdStream->getSpaceForCmd(); + *pipeControlCmd = PIPE_CONTROL::sInit(); + pipeControlCmd->setCommandStreamerStallEnable(true); + pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + pipeControlCmd->setAddress(static_cast(address & 0x0000FFFFFFFFULL)); + pipeControlCmd->setAddressHigh(static_cast(address >> 32)); + pipeControlCmd->setImmediateData(0); +} + template void GpgpuWalkerHelper::dispatchScheduler( CommandQueue &commandQueue, @@ -667,7 +692,6 @@ void GpgpuWalkerHelper::dispatchScheduler( using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; OCLRT::LinearStream *commandStream = nullptr; @@ -802,14 +826,14 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K template size_t EnqueueOperation::getTotalSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) { size_t size = KernelCommandsHelper::getSizeRequiredCS() + - sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper::isPipeControlWArequired() ? 2 : 1); + sizeof(PIPE_CONTROL) * (KernelCommandsHelper::isPipeControlWArequired() ? 2 : 1); if (reserveProfilingCmdsSpace) { - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); + size += 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } if (reservePerfCounters) { //start cmds //P_C: flush CS & TimeStamp BEGIN - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL); + size += 2 * sizeof(PIPE_CONTROL); //SRM NOOPID & Frequency size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); //gp registers @@ -821,7 +845,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(bool reserveProfiling //end cmds //P_C: flush CS & TimeStamp END; - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL); + size += 2 * sizeof(PIPE_CONTROL); //OA buffer (status head, tail) size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); //report perf count @@ -858,15 +882,15 @@ size_t EnqueueOperation::getSizeRequiredCS(uint32_t cmdType, bool res template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) { size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper::getSizeRequiredCS() + - sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper::isPipeControlWArequired() ? 2 : 1); + sizeof(PIPE_CONTROL) * (KernelCommandsHelper::isPipeControlWArequired() ? 2 : 1); size += PreemptionHelper::getPreemptionWaCsSize(commandQueue.getDevice()); if (reserveProfilingCmdsSpace) { - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); + size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } if (reservePerfCounters) { //start cmds //P_C: flush CS & TimeStamp BEGIN - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL); + size += 2 * sizeof(PIPE_CONTROL); //SRM NOOPID & Frequency size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); //gp registers @@ -878,7 +902,7 @@ size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilin //end cmds //P_C: flush CS & TimeStamp END; - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL); + size += 2 * sizeof(PIPE_CONTROL); //OA buffer (status head, tail) size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); //report perf count @@ -899,7 +923,7 @@ template size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) { size_t size = 0; if (reserveProfilingCmdsSpace) { - size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); + size += 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } return size; } diff --git a/runtime/helpers/CMakeLists.txt b/runtime/helpers/CMakeLists.txt index 9665f78d41..cff489359d 100644 --- a/runtime/helpers/CMakeLists.txt +++ b/runtime/helpers/CMakeLists.txt @@ -85,6 +85,7 @@ set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/string_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.h + ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h ${CMAKE_CURRENT_SOURCE_DIR}/task_information.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information.h ${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h diff --git a/runtime/helpers/timestamp_packet.h b/runtime/helpers/timestamp_packet.h new file mode 100644 index 0000000000..ff91d66ae5 --- /dev/null +++ b/runtime/helpers/timestamp_packet.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include + +namespace OCLRT { +class TimestampPacket { + public: + enum class DataIndex : uint32_t { + ContextStart, + GlobalStart, + ContextEnd, + GlobalEnd, + Max + }; + + enum class WriteOperationType : uint32_t { + Start, + End + }; + + const uint32_t *pickDataPtr() const { return &(data[0]); } + + uint64_t pickAddressForPipeControlWrite(WriteOperationType operationType) const { + auto index = WriteOperationType::Start == operationType + ? static_cast(DataIndex::ContextStart) + : static_cast(DataIndex::ContextEnd); + + return reinterpret_cast(&data[index]); + } + + uint32_t pickDataValue(DataIndex index) const { return data[static_cast(index)]; } + + protected: + std::array(DataIndex::Max)> data = {{1, 1, 1, 1}}; +}; +} // namespace OCLRT diff --git a/runtime/memory_manager/memory_manager.cpp b/runtime/memory_manager/memory_manager.cpp index e38a1d4251..e833acf1d3 100644 --- a/runtime/memory_manager/memory_manager.cpp +++ b/runtime/memory_manager/memory_manager.cpp @@ -30,6 +30,7 @@ #include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/basic_math.h" #include "runtime/helpers/options.h" +#include "runtime/helpers/timestamp_packet.h" #include "runtime/memory_manager/deferred_deleter.h" #include "runtime/utilities/stackvec.h" #include "runtime/utilities/tag_allocator.h" @@ -37,8 +38,7 @@ #include namespace OCLRT { -constexpr size_t ProfilingTagCount = 512; -constexpr size_t PerfCounterTagCount = 512; +constexpr size_t TagCount = 512; struct ReusableAllocationRequirements { size_t requiredMinimalSize; @@ -226,11 +226,16 @@ void MemoryManager::applyCommonCleanup() { if (this->paddingAllocation) { this->freeGraphicsMemory(this->paddingAllocation); } - if (profilingTimeStampAllocator) + if (profilingTimeStampAllocator) { profilingTimeStampAllocator->cleanUpResources(); - - if (perfCounterAllocator) + } + if (perfCounterAllocator) { perfCounterAllocator->cleanUpResources(); + } + + if (timestampPacketAllocator) { + timestampPacketAllocator->cleanUpResources(); + } cleanAllocationList(-1, TEMPORARY_ALLOCATION); cleanAllocationList(-1, REUSABLE_ALLOCATION); @@ -263,18 +268,25 @@ void MemoryManager::freeAllocationsList(uint32_t waitTaskCount, AllocationsList TagAllocator *MemoryManager::getEventTsAllocator() { if (profilingTimeStampAllocator.get() == nullptr) { - profilingTimeStampAllocator.reset(new TagAllocator(this, ProfilingTagCount, MemoryConstants::cacheLineSize)); + profilingTimeStampAllocator = std::make_unique>(this, TagCount, MemoryConstants::cacheLineSize); } return profilingTimeStampAllocator.get(); } TagAllocator *MemoryManager::getEventPerfCountAllocator() { if (perfCounterAllocator.get() == nullptr) { - perfCounterAllocator.reset(new TagAllocator(this, PerfCounterTagCount, MemoryConstants::cacheLineSize)); + perfCounterAllocator = std::make_unique>(this, TagCount, MemoryConstants::cacheLineSize); } return perfCounterAllocator.get(); } +TagAllocator *MemoryManager::getTimestampPacketAllocator() { + if (timestampPacketAllocator.get() == nullptr) { + timestampPacketAllocator = std::make_unique>(this, TagCount, MemoryConstants::cacheLineSize); + } + return timestampPacketAllocator.get(); +} + void MemoryManager::pushAllocationForResidency(GraphicsAllocation *gfxAllocation) { residencyAllocations.push_back(gfxAllocation); } diff --git a/runtime/memory_manager/memory_manager.h b/runtime/memory_manager/memory_manager.h index 57842fe5d7..9471ccb54c 100644 --- a/runtime/memory_manager/memory_manager.h +++ b/runtime/memory_manager/memory_manager.h @@ -36,6 +36,7 @@ class Device; class DeferredDeleter; class GraphicsAllocation; class CommandStreamReceiver; +class TimestampPacket; struct HwPerfCounter; struct HwTimeStamps; @@ -192,6 +193,7 @@ class MemoryManager { TagAllocator *getEventTsAllocator(); TagAllocator *getEventPerfCountAllocator(); + TagAllocator *getTimestampPacketAllocator(); std::unique_ptr obtainReusableAllocation(size_t requiredSize, bool isInternalAllocationRequired); @@ -255,6 +257,7 @@ class MemoryManager { std::recursive_mutex mtx; std::unique_ptr> profilingTimeStampAllocator; std::unique_ptr> perfCounterAllocator; + std::unique_ptr> timestampPacketAllocator; bool force32bitAllocations = false; bool virtualPaddingAvailable = false; GraphicsAllocation *paddingAllocation = nullptr; diff --git a/runtime/os_interface/DebugVariables_base.inl b/runtime/os_interface/DebugVariables_base.inl index 09fe6a1a49..8da6643e03 100644 --- a/runtime/os_interface/DebugVariables_base.inl +++ b/runtime/os_interface/DebugVariables_base.inl @@ -109,3 +109,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0 DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)") DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id") DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") +DECLARE_DEBUG_VARIABLE(bool, EnableTimestampPacket, false, "Write Timestamp Packet for each set of gpu walkers") diff --git a/unit_tests/command_queue/command_queue_tests.cpp b/unit_tests/command_queue/command_queue_tests.cpp index 7a595fc457..87af754953 100644 --- a/unit_tests/command_queue/command_queue_tests.cpp +++ b/unit_tests/command_queue/command_queue_tests.cpp @@ -291,7 +291,7 @@ TEST_F(CommandQueueCommandStreamTest, GetCommandStreamReturnsValidObject) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; CommandQueue commandQueue(context.get(), pDevice, props); - auto &cs = commandQueue.getCS(); + auto &cs = commandQueue.getCS(1024); EXPECT_NE(nullptr, &cs); } @@ -326,7 +326,7 @@ TEST_F(CommandQueueCommandStreamTest, getCommandStreamCanRecycle) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; CommandQueue commandQueue(context.get(), pDevice, props); - auto &commandStreamInitial = commandQueue.getCS(); + auto &commandStreamInitial = commandQueue.getCS(1024); size_t requiredSize = commandStreamInitial.getMaxAvailableSpace() + 42; const auto &commandStream = commandQueue.getCS(requiredSize); diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index d92266a45e..0a389ddf9d 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -151,6 +151,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, shouldntChangeCommandStreamMemor nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -198,6 +199,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, noLocalIdsShouldntCrash) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -227,6 +229,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm) nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -257,6 +260,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm) nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(dimension, *kernel.workDim); @@ -285,6 +289,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(dimension, *kernel.workDim); @@ -314,6 +319,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(dimension, *kernel.workDim); @@ -343,6 +349,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -374,6 +381,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(2u, *kernel.localWorkSizeX); @@ -404,6 +412,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(2u, *kernel.localWorkSizeX); @@ -435,6 +444,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(2u, *kernel.localWorkSizeX); @@ -466,6 +476,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(2u, *kernel.localWorkSizeX); @@ -495,6 +506,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(1u, *kernel.localWorkSizeX); @@ -527,6 +539,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); EXPECT_EQ(1u, *kernel.localWorkSizeX); @@ -563,6 +576,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -613,6 +627,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -667,10 +682,11 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), blockQueue); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); EXPECT_EQ(0u, commandStream.getUsed()); EXPECT_NE(nullptr, blockedCommandsData); EXPECT_NE(nullptr, blockedCommandsData->commandStream); @@ -706,6 +722,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), blockQueue); @@ -743,6 +760,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), blockQueue); @@ -775,6 +793,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) { nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -816,6 +835,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -900,6 +920,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -945,6 +966,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -995,6 +1017,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -1038,7 +1061,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, givenMultiDispatchWhenWhiteliste DispatchInfo di2(&kernel, 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0)); MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); - GpgpuWalkerHelper::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false); + GpgpuWalkerHelper::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false); hwParser.parseCommands(cmdStream, 0); diff --git a/unit_tests/command_queue/drm_requirements_tests.cpp b/unit_tests/command_queue/drm_requirements_tests.cpp index 88dea38539..ea6861ba5c 100644 --- a/unit_tests/command_queue/drm_requirements_tests.cpp +++ b/unit_tests/command_queue/drm_requirements_tests.cpp @@ -198,7 +198,7 @@ HWTEST_F(DrmRequirementsTests, csrNewCSSized) { TEST_F(DrmRequirementsTests, cqNewCS) { { - auto &cs = pCmdQ->getCS(); + auto &cs = pCmdQ->getCS(1024); auto memoryManager = pDevice->getMemoryManager(); EXPECT_NE(nullptr, cs.getCpuBase()); ASSERT_NE(nullptr, cs.getGraphicsAllocation()); @@ -208,7 +208,7 @@ TEST_F(DrmRequirementsTests, cqNewCS) { cs.replaceGraphicsAllocation(nullptr); } { - auto &cs = pCmdQ->getCS(); + auto &cs = pCmdQ->getCS(1024); EXPECT_NE(nullptr, cs.getCpuBase()); EXPECT_GT(cs.getMaxAvailableSpace(), 0u); ASSERT_NE(nullptr, cs.getGraphicsAllocation()); diff --git a/unit_tests/command_queue/finish_tests.cpp b/unit_tests/command_queue/finish_tests.cpp index 8e424994d1..d22dc9475b 100644 --- a/unit_tests/command_queue/finish_tests.cpp +++ b/unit_tests/command_queue/finish_tests.cpp @@ -95,7 +95,7 @@ HWTEST_F(FinishTest, doesntAddAPipecontrolToCQCommandStream) { ASSERT_EQ(CL_SUCCESS, retVal); // Check for PIPE_CONTROL - parseCommands(pCmdQ->getCS()); + parseCommands(pCmdQ->getCS(1024)); auto itorCmd = reverse_find(cmdList.rbegin(), cmdList.rend()); EXPECT_EQ(cmdList.rend(), itorCmd); } @@ -107,4 +107,4 @@ HWTEST_F(FinishTest, givenFreshQueueWhenFinishIsCalledThenCommandStreamIsNotAllo ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQ.peekCommandStream()); -} \ No newline at end of file +} diff --git a/unit_tests/command_queue/get_size_required_buffer_tests.cpp b/unit_tests/command_queue/get_size_required_buffer_tests.cpp index 59af9b7e42..1c27a4ee91 100644 --- a/unit_tests/command_queue/get_size_required_buffer_tests.cpp +++ b/unit_tests/command_queue/get_size_required_buffer_tests.cpp @@ -78,7 +78,7 @@ struct GetSizeRequiredBufferTest : public CommandEnqueueFixture, HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueFillBuffer) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -130,7 +130,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueFillBuffer) { HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueCopyBuffer) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -181,7 +181,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueCopyBuffer) { HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueReadBufferNonBlocking) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -233,7 +233,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueReadBufferNonBlock HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueReadBufferBlocking) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -286,7 +286,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueReadBufferBlocking HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueWriteBufferNonBlocking) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -335,7 +335,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueWriteBufferNonBloc } HWTEST_F(GetSizeRequiredBufferTest, enqueueWriteBufferBlocking) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -386,7 +386,7 @@ HWTEST_F(GetSizeRequiredBufferTest, enqueueWriteBufferBlocking) { HWTEST_F(GetSizeRequiredBufferTest, enqueueKernelHelloWorld) { typedef HelloWorldKernelFixture KernelFixture; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); @@ -425,7 +425,7 @@ HWTEST_F(GetSizeRequiredBufferTest, enqueueKernelHelloWorld) { HWCMDTEST_F(IGFX_GEN8_CORE, GetSizeRequiredBufferTest, enqueueKernelSimpleArg) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef SimpleArgKernelFixture KernelFixture; - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); diff --git a/unit_tests/command_queue/get_size_required_image_tests.cpp b/unit_tests/command_queue/get_size_required_image_tests.cpp index 78de4fdbfd..651c2604fa 100644 --- a/unit_tests/command_queue/get_size_required_image_tests.cpp +++ b/unit_tests/command_queue/get_size_required_image_tests.cpp @@ -69,7 +69,7 @@ struct GetSizeRequiredImageTest : public CommandEnqueueFixture, }; HWTEST_F(GetSizeRequiredImageTest, enqueueCopyImage) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -119,7 +119,7 @@ HWTEST_F(GetSizeRequiredImageTest, enqueueCopyImage) { } HWTEST_F(GetSizeRequiredImageTest, enqueueCopyReadAndWriteImage) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -169,7 +169,7 @@ HWTEST_F(GetSizeRequiredImageTest, enqueueCopyReadAndWriteImage) { } HWTEST_F(GetSizeRequiredImageTest, enqueueReadImageNonBlocking) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -223,7 +223,7 @@ HWTEST_F(GetSizeRequiredImageTest, enqueueReadImageNonBlocking) { } HWTEST_F(GetSizeRequiredImageTest, enqueueReadImageBlocking) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -277,7 +277,7 @@ HWTEST_F(GetSizeRequiredImageTest, enqueueReadImageBlocking) { } HWTEST_F(GetSizeRequiredImageTest, enqueueWriteImageNonBlocking) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); @@ -331,7 +331,7 @@ HWTEST_F(GetSizeRequiredImageTest, enqueueWriteImageNonBlocking) { } HWTEST_F(GetSizeRequiredImageTest, enqueueWriteImageBlocking) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); diff --git a/unit_tests/command_queue/get_size_required_tests.cpp b/unit_tests/command_queue/get_size_required_tests.cpp index a3de7ca2ab..539d6c29f0 100644 --- a/unit_tests/command_queue/get_size_required_tests.cpp +++ b/unit_tests/command_queue/get_size_required_tests.cpp @@ -61,7 +61,7 @@ struct GetSizeRequiredTest : public CommandEnqueueFixture, }; HWTEST_F(GetSizeRequiredTest, finish) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto retVal = pCmdQ->finish(false); @@ -74,7 +74,7 @@ HWTEST_F(GetSizeRequiredTest, finish) { } HWTEST_F(GetSizeRequiredTest, enqueueMarker) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); @@ -95,7 +95,7 @@ HWTEST_F(GetSizeRequiredTest, enqueueMarker) { } HWTEST_F(GetSizeRequiredTest, enqueueBarrierDoesntConsumeAnySpace) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); diff --git a/unit_tests/command_queue/oom_buffer_tests.cpp b/unit_tests/command_queue/oom_buffer_tests.cpp index c90aebe906..65387eb47a 100644 --- a/unit_tests/command_queue/oom_buffer_tests.cpp +++ b/unit_tests/command_queue/oom_buffer_tests.cpp @@ -109,7 +109,7 @@ struct OOMCommandQueueBufferTest : public MemoryManagementFixture, HWTEST_P(OOMCommandQueueBufferTest, enqueueCopyBuffer) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -133,7 +133,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueCopyBuffer) { HWTEST_P(OOMCommandQueueBufferTest, enqueueFillBuffer) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -157,7 +157,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueFillBuffer) { HWTEST_P(OOMCommandQueueBufferTest, enqueueReadBuffer) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -181,7 +181,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueReadBuffer) { HWTEST_P(OOMCommandQueueBufferTest, enqueueWriteBuffer) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -205,7 +205,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueWriteBuffer) { HWTEST_P(OOMCommandQueueBufferTest, enqueueWriteBufferRect) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -230,7 +230,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueKernelHelloWorld) { typedef HelloWorldKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -260,7 +260,7 @@ HWTEST_P(OOMCommandQueueBufferTest, enqueueKernelSimpleArg) { typedef SimpleArgKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); diff --git a/unit_tests/command_queue/oom_image_tests.cpp b/unit_tests/command_queue/oom_image_tests.cpp index f73025d047..9afe995937 100644 --- a/unit_tests/command_queue/oom_image_tests.cpp +++ b/unit_tests/command_queue/oom_image_tests.cpp @@ -93,7 +93,7 @@ struct OOMCommandQueueImageTest : public DeviceFixture, HWTEST_P(OOMCommandQueueImageTest, enqueueCopyImage) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -117,7 +117,7 @@ HWTEST_P(OOMCommandQueueImageTest, enqueueCopyImage) { HWTEST_P(OOMCommandQueueImageTest, enqueueFillImage) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -141,7 +141,7 @@ HWTEST_P(OOMCommandQueueImageTest, enqueueFillImage) { HWTEST_P(OOMCommandQueueImageTest, enqueueReadImage) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -165,7 +165,7 @@ HWTEST_P(OOMCommandQueueImageTest, enqueueReadImage) { HWTEST_P(OOMCommandQueueImageTest, enqueueWriteImage) { CommandQueueHw cmdQ(context, pDevice, 0); - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); diff --git a/unit_tests/command_queue/oom_tests.cpp b/unit_tests/command_queue/oom_tests.cpp index 83e2050ad9..227d221bdd 100644 --- a/unit_tests/command_queue/oom_tests.cpp +++ b/unit_tests/command_queue/oom_tests.cpp @@ -83,7 +83,7 @@ struct OOMCommandQueueTest : public DeviceFixture, }; HWTEST_P(OOMCommandQueueTest, finish) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -99,7 +99,7 @@ HWTEST_P(OOMCommandQueueTest, finish) { } HWTEST_P(OOMCommandQueueTest, enqueueMarker) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); @@ -122,7 +122,7 @@ HWTEST_P(OOMCommandQueueTest, enqueueMarker) { } HWTEST_P(OOMCommandQueueTest, enqueueBarrier) { - auto &commandStream = pCmdQ->getCS(); + auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); diff --git a/unit_tests/command_stream/command_stream_fixture.h b/unit_tests/command_stream/command_stream_fixture.h index 675d27094e..90aca26720 100644 --- a/unit_tests/command_stream/command_stream_fixture.h +++ b/unit_tests/command_stream/command_stream_fixture.h @@ -33,7 +33,7 @@ struct CommandStreamFixture { } void SetUp(CommandQueue *pCmdQ) { - pCS = &pCmdQ->getCS(); + pCS = &pCmdQ->getCS(1024); pCmdBuffer = pCS->getCpuBase(); } diff --git a/unit_tests/command_stream/command_stream_receiver_flush_task_tests.cpp b/unit_tests/command_stream/command_stream_receiver_flush_task_tests.cpp index 3d047f7e80..ebe8bf5e02 100644 --- a/unit_tests/command_stream/command_stream_receiver_flush_task_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_flush_task_tests.cpp @@ -867,7 +867,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushTaskWithOnlyPipeContr // Force a PIPE_CONTROL through a blocking flag auto blocking = true; - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->lastSentCoherencyRequest = 0; @@ -914,7 +914,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskBlockingHasPipeControlWit size_t pipeControlCount = static_cast &>(commandStreamReceiver).getRequiredPipeControlSize() / sizeof(PIPE_CONTROL); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); DispatchFlags dispatchFlags; dispatchFlags.blocking = true; @@ -973,7 +973,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlush auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0); @@ -1012,7 +1012,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFl auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0); @@ -1040,7 +1040,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskWithTaskCSPassedAsCommand CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); auto deviceEngineType = pDevice->getEngineType(); DispatchFlags dispatchFlags; @@ -1174,7 +1174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenPowerOfTwo size_t GWS = 1024; CommandQueueHw commandQueue(&ctx, pDevice, 0); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCmd, cmdList.end()); @@ -1251,7 +1251,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapWhenFinishIsCal EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); @@ -1266,7 +1266,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenFlushedCal cl_event event = nullptr; auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); - auto &commandStreamTask = commandQueue.getCS(); + auto &commandStreamTask = commandQueue.getCS(1024); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; diff --git a/unit_tests/event/user_events_tests.cpp b/unit_tests/event/user_events_tests.cpp index 76c480efb2..a5cdaa2b06 100644 --- a/unit_tests/event/user_events_tests.cpp +++ b/unit_tests/event/user_events_tests.cpp @@ -741,7 +741,7 @@ TEST_F(EventTests, userEventsDoesntChangeCommandStreamWhileEnqueueButDoesAfterSi int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; - auto &cs = pCmdQ->getCS(); + auto &cs = pCmdQ->getCS(1024); auto used = cs.getSpace(0); //call NDR diff --git a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp index 973c41ee28..e3a1ea1146 100644 --- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp +++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp @@ -64,6 +64,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDev &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -119,6 +120,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDef &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -144,6 +146,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSH &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -179,6 +182,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), true); ASSERT_NE(nullptr, blockedCommandsData); @@ -276,6 +280,7 @@ HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatched &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), true); @@ -309,6 +314,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -365,6 +371,7 @@ HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedT &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); @@ -400,6 +407,7 @@ HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatch &blockedCommandsData, nullptr, nullptr, + nullptr, pDevice->getPreemptionMode(), false); diff --git a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp index b05ad35b66..11cbd5291e 100644 --- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp +++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp @@ -449,6 +449,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand &blockedCommandsData, nullptr, nullptr, + nullptr, device->getPreemptionMode(), true); diff --git a/unit_tests/gen9/test_preemption_gen9.cpp b/unit_tests/gen9/test_preemption_gen9.cpp index 9782810bce..c93e2de85c 100644 --- a/unit_tests/gen9/test_preemption_gen9.cpp +++ b/unit_tests/gen9/test_preemption_gen9.cpp @@ -124,13 +124,13 @@ GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithThe HardwareParse hwParserCmdQ; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParserCsr.parseCommands(csr.commandStream); - hwParserCmdQ.parseCommands(pCmdQ->getCS()); + hwParserCmdQ.parseCommands(pCmdQ->getCS(1024)); auto offsetCsr = csr.commandStream.getUsed(); - auto offsetCmdQ = pCmdQ->getCS().getUsed(); + auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); hwParserCsr.parseCommands(csr.commandStream, offsetCsr); - hwParserCmdQ.parseCommands(pCmdQ->getCS(), offsetCmdQ); + hwParserCmdQ.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); EXPECT_EQ(1U, countMmio(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end(), 0x2580u)); EXPECT_EQ(0U, countMmio(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end(), 0x2600u)); @@ -159,10 +159,10 @@ GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithThe pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); - hwCmdQParser.parseCommands(pCmdQ->getCS()); + hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); - auto offsetCmdQ = pCmdQ->getCS().getUsed(); + auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOne = false; for (auto it : hwCsrParser.lriList) { @@ -201,7 +201,7 @@ GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithThe hwCsrParser.parseCommands(csr.commandStream, offsetCsr); hwCsrParser.findHardwareCommands(); - hwCmdQParser.parseCommands(pCmdQ->getCS(), offsetCmdQ); + hwCmdQParser.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); hwCmdQParser.findHardwareCommands(); for (auto it : hwCsrParser.lriList) { @@ -296,10 +296,10 @@ GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSa pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); - hwCmdQParser.parseCommands(pCmdQ->getCS()); + hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); - auto offsetCmdQ = pCmdQ->getCS().getUsed(); + auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOneLri = false; for (auto it : hwCsrParser.lriList) { @@ -376,10 +376,10 @@ GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSa pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); - hwCmdQParser.parseCommands(pCmdQ->getCS()); + hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); - auto offsetCmdQ = pCmdQ->getCS().getUsed(); + auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOneLri = false; for (auto it : hwCsrParser.lriList) { @@ -425,7 +425,7 @@ GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSa pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream, offsetCsr); hwCsrParser.findHardwareCommands(); - hwCmdQParser.parseCommands(pCmdQ->getCS(), offsetCmdQ); + hwCmdQParser.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); hwCmdQParser.findHardwareCommands(); for (auto it : hwCsrParser.lriList) { diff --git a/unit_tests/helpers/CMakeLists.txt b/unit_tests/helpers/CMakeLists.txt index bbc74ab5e7..bd926bc80d 100644 --- a/unit_tests/helpers/CMakeLists.txt +++ b/unit_tests/helpers/CMakeLists.txt @@ -54,6 +54,7 @@ set(IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/string_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TestDebugVariables.inl + ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.inl diff --git a/unit_tests/helpers/hw_parse.h b/unit_tests/helpers/hw_parse.h index 53741a116a..4d689c438b 100644 --- a/unit_tests/helpers/hw_parse.h +++ b/unit_tests/helpers/hw_parse.h @@ -97,7 +97,7 @@ struct HardwareParse { parseCommands(commandStreamCSR, startCSRCS); startCSRCS = commandStreamCSR.getUsed(); - auto &commandStream = commandQueue.getCS(); + auto &commandStream = commandQueue.getCS(1024); if (previousCS != &commandStream) { startCS = 0; } diff --git a/unit_tests/helpers/kernel_commands_tests.cpp b/unit_tests/helpers/kernel_commands_tests.cpp index 45bfc215d3..a20b82ac3a 100644 --- a/unit_tests/helpers/kernel_commands_tests.cpp +++ b/unit_tests/helpers/kernel_commands_tests.cpp @@ -116,7 +116,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, programMediaInterfaceDescriptorL typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); KernelCommandsHelper::sendMediaInterfaceDescriptorLoad(commandStream, @@ -133,7 +133,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, programMediaStateFlushResourceUs typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); KernelCommandsHelper::sendMediaStateFlush(commandStream, @@ -303,7 +303,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, sendIndirectStateResourceUsage) const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); @@ -355,7 +355,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; CommandQueueHw cmdQ(pContext, pDevice, 0); - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); MockKernelWithInternals mockKernel(*pDevice, pContext); @@ -389,7 +389,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; CommandQueueHw cmdQ(pContext, pDevice, 0); - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); MockKernelWithInternals mockKernel(*pDevice, pContext); @@ -425,7 +425,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; CommandQueueHw cmdQ(pContext, pDevice, 0); - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); MockKernelWithInternals mockKernel(*pDevice, pContext); @@ -481,7 +481,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, whenSendingIndirectStateThenKern const size_t localWorkSizeZ = 4; const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ}; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); @@ -551,7 +551,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, usedBindingTableStatePointer) { const size_t localWorkSizes[3]{256, 1, 1}; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); @@ -707,7 +707,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, usedBindingTableStatePointersFor for (uint32_t ssbaOffset : {0U, (uint32_t)sizeof(typename FamilyType::RENDER_SURFACE_STATE)}) { CommandQueueHw cmdQ(nullptr, pDevice, 0); - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); @@ -917,7 +917,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir MockKernelWithInternals kernelInternals(*pDevice); const size_t localWorkSizes[3]{1, 1, 1}; - auto &commandStream = cmdQ.getCS(); + auto &commandStream = cmdQ.getCS(1024); auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp new file mode 100644 index 0000000000..2e8af34de6 --- /dev/null +++ b/unit_tests/helpers/timestamp_packet_tests.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "runtime/command_queue/gpgpu_walker.h" +#include "runtime/helpers/options.h" +#include "runtime/helpers/timestamp_packet.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" +#include "unit_tests/helpers/hw_parse.h" +#include "unit_tests/mocks/mock_context.h" +#include "unit_tests/mocks/mock_device.h" +#include "unit_tests/mocks/mock_command_queue.h" +#include "unit_tests/mocks/mock_kernel.h" +#include "unit_tests/mocks/mock_mdi.h" + +#include "test.h" + +using namespace OCLRT; + +class MockTimestampPacket : public TimestampPacket { + public: + using TimestampPacket::data; +}; + +using TimestampPacketTests = ::testing::Test; + +TEST_F(TimestampPacketTests, whenObjectIsCreatedThenInitializeAllStamps) { + MockTimestampPacket timestampPacket; + auto maxElements = static_cast(TimestampPacket::DataIndex::Max); + EXPECT_EQ(4u, maxElements); + + EXPECT_EQ(maxElements, timestampPacket.data.size()); + + for (uint32_t i = 0; i < maxElements; i++) { + EXPECT_EQ(1u, timestampPacket.pickDataValue(static_cast(i))); + EXPECT_EQ(1u, timestampPacket.data[i]); + } +} + +TEST_F(TimestampPacketTests, whenAskedForStampAddressThenReturnWithValidOffset) { + MockTimestampPacket timestampPacket; + + EXPECT_EQ(×tampPacket.data[0], timestampPacket.pickDataPtr()); + + auto startAddress = timestampPacket.pickAddressForPipeControlWrite(TimestampPacket::WriteOperationType::Start); + auto expectedStartAddress = ×tampPacket.data[static_cast(TimestampPacket::DataIndex::ContextStart)]; + EXPECT_EQ(expectedStartAddress, ×tampPacket.data[0]); + EXPECT_EQ(reinterpret_cast(expectedStartAddress), startAddress); + + auto endAddress = timestampPacket.pickAddressForPipeControlWrite(TimestampPacket::WriteOperationType::End); + auto expectedEndAddress = ×tampPacket.data[static_cast(TimestampPacket::DataIndex::ContextEnd)]; + EXPECT_EQ(expectedEndAddress, ×tampPacket.data[2]); + EXPECT_EQ(reinterpret_cast(expectedEndAddress), endAddress); +} + +HWTEST_F(TimestampPacketTests, givenDebugVariableEnabledWhenEstimatingStreamSizeThenAddTwoPipeControls) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableTimestampPacket.set(false); + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockCommandQueue cmdQ(nullptr, device.get(), nullptr); + MockKernelWithInternals kernel1(*device); + MockKernelWithInternals kernel2(*device); + MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel1.mockKernel, kernel2.mockKernel})); + + getCommandStream(cmdQ, false, false, multiDispatchInfo); + auto sizeWithDisabled = cmdQ.requestedCmdStreamSize; + + DebugManager.flags.EnableTimestampPacket.set(true); + getCommandStream(cmdQ, false, false, multiDispatchInfo); + auto sizeWithEnabled = cmdQ.requestedCmdStreamSize; + + EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + 2 * sizeof(typename FamilyType::PIPE_CONTROL)); +} + +HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenEnabledDebugVariableWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) { + using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + MockTimestampPacket timestampPacket; + DebugManagerStateRestore restore; + DebugManager.flags.EnableTimestampPacket.set(true); + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockKernelWithInternals kernel1(*device); + MockKernelWithInternals kernel2(*device); + + MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel1.mockKernel, kernel2.mockKernel})); + + MockCommandQueue cmdQ(nullptr, device.get(), nullptr); + auto &cmdStream = cmdQ.getCS(0); + + GpgpuWalkerHelper::dispatchWalker( + cmdQ, + multiDispatchInfo, + 0, + nullptr, + nullptr, + nullptr, + nullptr, + ×tampPacket, + device->getPreemptionMode(), + false); + + HardwareParse hwParser; + hwParser.parseCommands(cmdStream, 0); + + auto verifyPipeControl = [](PIPE_CONTROL *pipeControl, uint64_t expectedAddress) { + EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); + EXPECT_EQ(0u, pipeControl->getImmediateData()); + EXPECT_EQ(static_cast(expectedAddress & 0x0000FFFFFFFFULL), pipeControl->getAddress()); + EXPECT_EQ(static_cast(expectedAddress >> 32), pipeControl->getAddressHigh()); + }; + + uint32_t walkersFound = 0; + for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { + if (genCmdCast(*it)) { + walkersFound++; + if (walkersFound == 1) { + EXPECT_EQ(nullptr, genCmdCast(*--it)); + it++; + EXPECT_EQ(nullptr, genCmdCast(*++it)); + it--; + } else if (walkersFound == 2) { + auto pipeControl = genCmdCast(*--it); + EXPECT_NE(nullptr, pipeControl); + verifyPipeControl(pipeControl, timestampPacket.pickAddressForPipeControlWrite(TimestampPacket::WriteOperationType::Start)); + it++; + pipeControl = genCmdCast(*++it); + EXPECT_NE(nullptr, pipeControl); + verifyPipeControl(pipeControl, timestampPacket.pickAddressForPipeControlWrite(TimestampPacket::WriteOperationType::End)); + it--; + } + } + } + EXPECT_EQ(2u, walkersFound); +} diff --git a/unit_tests/memory_manager/memory_manager_tests.cpp b/unit_tests/memory_manager/memory_manager_tests.cpp index bfd21e1e9f..7a264f15a4 100644 --- a/unit_tests/memory_manager/memory_manager_tests.cpp +++ b/unit_tests/memory_manager/memory_manager_tests.cpp @@ -22,6 +22,7 @@ #include "runtime/event/event.h" #include "runtime/helpers/dispatch_info.h" +#include "runtime/helpers/timestamp_packet.h" #include "runtime/mem_obj/image.h" #include "runtime/os_interface/os_interface.h" #include "runtime/program/printf_handler.h" @@ -578,6 +579,29 @@ TEST_F(MemoryAllocatorTest, getEventPerfCountAllocator) { EXPECT_EQ(allocator2, allocator); } +TEST_F(MemoryAllocatorTest, givenTimestampPacketAllocatorWhenAskingForTagThenReturnValidObject) { + class MyMockMemoryManager : public OsAgnosticMemoryManager { + public: + using OsAgnosticMemoryManager::timestampPacketAllocator; + MyMockMemoryManager() : OsAgnosticMemoryManager(false){}; + } myMockMemoryManager; + + EXPECT_EQ(nullptr, myMockMemoryManager.timestampPacketAllocator.get()); + + TagAllocator *allocator = myMockMemoryManager.getTimestampPacketAllocator(); + EXPECT_NE(nullptr, myMockMemoryManager.timestampPacketAllocator.get()); + EXPECT_EQ(allocator, myMockMemoryManager.timestampPacketAllocator.get()); + + TagAllocator *allocator2 = myMockMemoryManager.getTimestampPacketAllocator(); + EXPECT_EQ(allocator, allocator2); + + auto node1 = allocator->getTag(); + auto node2 = allocator->getTag(); + EXPECT_NE(nullptr, node1); + EXPECT_NE(nullptr, node2); + EXPECT_NE(node1, node2); +} + TEST_F(MemoryAllocatorTest, givenMemoryManagerWhensetForce32BitAllocationsIsCalledWithTrueMutlipleTimesThenAllocatorIsReused) { memoryManager->setForce32BitAllocations(true); EXPECT_NE(nullptr, memoryManager->allocator32Bit.get()); diff --git a/unit_tests/mocks/mock_command_queue.h b/unit_tests/mocks/mock_command_queue.h index cb7db90d85..1004ca6e9a 100644 --- a/unit_tests/mocks/mock_command_queue.h +++ b/unit_tests/mocks/mock_command_queue.h @@ -40,6 +40,11 @@ class MockCommandQueue : public CommandQueue { : CommandQueue(context, device, props) { } + LinearStream &getCS(size_t minRequiredSize) override { + requestedCmdStreamSize = minRequiredSize; + return CommandQueue::getCS(minRequiredSize); + } + void releaseIndirectHeap(IndirectHeap::Type heap) override { releaseIndirectHeapCalled = true; CommandQueue::releaseIndirectHeap(heap); @@ -63,6 +68,7 @@ class MockCommandQueue : public CommandQueue { size_t writeBufferOffset = 0; size_t writeBufferSize = 0; void *writeBufferPtr = nullptr; + size_t requestedCmdStreamSize = 0; }; template diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 491b079cdb..aa7a68945c 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -79,4 +79,5 @@ AUBDumpFilterKernelEndIdx = -1 RebuildPrecompiledKernels = false CreateMultipleDevices = 0 EnableExperimentalCommandBuffer = 0 -LoopAtPlatformInitialize = false \ No newline at end of file +LoopAtPlatformInitialize = false +EnableTimestampPacket = false \ No newline at end of file