mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Handle TimestampPackets for non-kernel enqueues
Change-Id: I52ec4f43b10bf6e2a10b2455d32a90a606645d29 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
8ceba24296
commit
66427f60c6
@ -586,4 +586,9 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp
|
||||
timestampPacketContainer->add(allocator->getTag());
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest) {
|
||||
return this->timestampPacketContainer &&
|
||||
((CL_COMMAND_MARKER == commandType && eventsRequest.outEvent && eventsRequest.numEventsInWaitList == 0) || (CL_COMMAND_BARRIER == commandType));
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/properties_helper.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/os_interface/performance_counters.h"
|
||||
#include <atomic>
|
||||
@ -25,7 +26,6 @@ class Image;
|
||||
class IndirectHeap;
|
||||
class Kernel;
|
||||
class MemObj;
|
||||
class TimestampPacketContainer;
|
||||
struct CompletionStamp;
|
||||
|
||||
enum class QueuePriority {
|
||||
@ -417,6 +417,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes);
|
||||
bool allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest);
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
@ -197,6 +197,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
TimestampPacketContainer previousTimestampPacketNodes(device->getMemoryManager());
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
bool emitPipeControlWithTimestampWrite = allowTimestampPacketPipeControlWrite(commandType, eventsRequest);
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
HwPerfCounter *hwPerfCounter = nullptr;
|
||||
@ -219,7 +221,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
if (timestampPacketContainer.get()) {
|
||||
eventBuilder.getEvent()->setTimestampPacketNodes(*timestampPacketContainer);
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
if (this->isProfilingEnabled()) {
|
||||
// Get allocation for timestamps
|
||||
@ -268,9 +270,21 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
commandStreamReceiver.setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||
|
||||
slmUsed = multiDispatchInfo.usesSlm();
|
||||
} else if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
if (emitPipeControlWithTimestampWrite) {
|
||||
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes);
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
CompletionStamp completionStamp;
|
||||
if (!blockQueue) {
|
||||
if (parentKernel) {
|
||||
@ -316,7 +330,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
|
||||
auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true;
|
||||
auto submissionRequired = !isCommandWithoutKernel(commandType) || emitPipeControlWithTimestampWrite;
|
||||
|
||||
if (submissionRequired) {
|
||||
completionStamp = enqueueNonBlocked<commandType>(
|
||||
@ -488,7 +502,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
bool slmUsed,
|
||||
PrintfHandler *printfHandler) {
|
||||
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty());
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty() && !timestampPacketContainer);
|
||||
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
auto implicitFlush = false;
|
||||
@ -553,7 +567,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
}
|
||||
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
|
||||
if (multiDispatchInfo.peekMainKernel()) {
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
@ -570,6 +586,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
if (multiDispatchInfo.empty()) {
|
||||
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketContainer->peekNodes().at(0);
|
||||
}
|
||||
}
|
||||
dispatchFlags.numGrfRequired = numGrfRequired;
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
@ -639,8 +658,13 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
isPerfCountersEnabled(),
|
||||
*this,
|
||||
nullptr));
|
||||
auto cmd = std::unique_ptr<Command>(new CommandMarker(
|
||||
*this, commandStreamReceiver, commandType, cmdSize));
|
||||
|
||||
auto cmd = std::make_unique<CommandMarker>(*this, commandStreamReceiver, commandType, cmdSize);
|
||||
|
||||
if (allowTimestampPacketPipeControlWrite(commandType, eventsRequest)) {
|
||||
cmd->setTimestampPacketsForPipeControlWrite(*timestampPacketContainer);
|
||||
}
|
||||
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
} else {
|
||||
//store task data in event
|
||||
|
@ -183,13 +183,8 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(
|
||||
|
||||
if (TimestampPacket::WriteOperationType::AfterWalker == writeOperationType) {
|
||||
uint64_t address = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
auto pipeControlCmd = cmdStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
|
||||
pipeControlCmd->setAddress(static_cast<uint32_t>(address & 0x0000FFFFFFFFULL));
|
||||
pipeControlCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
|
||||
pipeControlCmd->setImmediateData(0);
|
||||
|
||||
KernelCommandsHelper<GfxFamily>::programPipeControlDataWriteWithCsStall(*cmdStream, address, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,11 +10,10 @@
|
||||
#include "runtime/command_stream/experimental_command_buffer.h"
|
||||
#include "runtime/command_stream/preemption.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include "runtime/helpers/cache_policy.h"
|
||||
#include "runtime/helpers/flush_stamp.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/os_interface/os_interface.h"
|
||||
|
||||
|
@ -254,6 +254,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (dispatchFlags.outOfDeviceDependencies) {
|
||||
handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, device);
|
||||
}
|
||||
if (dispatchFlags.timestampPacketForPipeControlWrite) {
|
||||
uint64_t address = dispatchFlags.timestampPacketForPipeControlWrite->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
KernelCommandsHelper<GfxFamily>::programPipeControlDataWriteWithCsStall(commandStreamCSR, address, 0);
|
||||
makeResident(*dispatchFlags.timestampPacketForPipeControlWrite->getGraphicsAllocation());
|
||||
}
|
||||
initPageTableManagerRegisters(commandStreamCSR);
|
||||
programPreemption(commandStreamCSR, device, dispatchFlags);
|
||||
programComputeMode(commandStreamCSR, dispatchFlags);
|
||||
|
@ -14,6 +14,9 @@
|
||||
|
||||
namespace OCLRT {
|
||||
struct FlushStampTrackingObj;
|
||||
class TimestampPacket;
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
|
||||
namespace CSRequirements {
|
||||
//cleanup section usually contains 1-2 pipeControls BB end and place for BB start
|
||||
@ -41,6 +44,7 @@ struct DispatchFlags {
|
||||
QueueThrottle throttle = QueueThrottle::MEDIUM;
|
||||
bool implicitFlush = false;
|
||||
bool outOfOrderExecutionAllowed = false;
|
||||
TagNode<TimestampPacket> *timestampPacketForPipeControlWrite = nullptr;
|
||||
FlushStampTrackingObj *flushStampReference = nullptr;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
EventsRequest *outOfDeviceDependencies = nullptr;
|
||||
|
@ -7,12 +7,14 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/device/device_vector.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/context/driver_diagnostics.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include <vector>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class CommandQueue;
|
||||
class Device;
|
||||
class DeviceQueue;
|
||||
class MemoryManager;
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/event/async_events_handler.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/os_interface/os_thread.h"
|
||||
#include <iterator>
|
||||
|
||||
|
@ -713,7 +713,7 @@ void Event::copyPerfCounters(InstrPmRegsCfg *config) {
|
||||
memcpy_s(perfConfigurationData, sizeof(InstrPmRegsCfg), config, sizeof(InstrPmRegsCfg));
|
||||
}
|
||||
|
||||
void Event::setTimestampPacketNodes(TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
void Event::addTimestampPacketNodes(TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer);
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
HwTimeStamps *getHwTimeStamp();
|
||||
GraphicsAllocation *getHwTimeStampAllocation();
|
||||
|
||||
void setTimestampPacketNodes(TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
void addTimestampPacketNodes(TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
TimestampPacketContainer *getTimestampPacketNodes() const;
|
||||
|
||||
bool isPerfCountersEnabled() {
|
||||
|
@ -1,23 +1,8 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/api/cl_types.h"
|
||||
@ -25,6 +10,7 @@
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
|
||||
namespace OCLRT {
|
||||
EventBuilder::~EventBuilder() {
|
||||
|
@ -155,6 +155,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
|
||||
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
|
||||
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
|
||||
static void programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data);
|
||||
|
||||
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
|
||||
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);
|
||||
|
@ -453,6 +453,19 @@ typename GfxFamily::MI_ATOMIC *KernelCommandsHelper<GfxFamily>::programMiAtomic(
|
||||
return miAtomic;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void KernelCommandsHelper<GfxFamily>::programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
auto pipeControlCmd = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
|
||||
pipeControlCmd->setAddress(static_cast<uint32_t>(writeAddress & 0x0000FFFFFFFFULL));
|
||||
pipeControlCmd->setAddressHigh(static_cast<uint32_t>(writeAddress >> 32));
|
||||
pipeControlCmd->setImmediateData(data);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool KernelCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
|
||||
return true;
|
||||
|
@ -252,6 +252,9 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
|
||||
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
|
||||
dispatchFlags.throttle = cmdQ.getThrottle();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
|
||||
if (timestampPacketsForPipeControlWrite) {
|
||||
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketsForPipeControlWrite->peekNodes().at(0);
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
@ -270,4 +273,9 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
void CommandMarker::setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes) {
|
||||
timestampPacketsForPipeControlWrite = std::make_unique<TimestampPacketContainer>(cmdQ.getDevice().getMemoryManager());
|
||||
timestampPacketsForPipeControlWrite->assignAndIncrementNodesRefCounts(inputNodes);
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -120,9 +120,11 @@ class CommandMarker : public Command {
|
||||
: cmdQ(cmdQ), csr(csr), clCommandType(clCommandType), commandSize(commandSize) {
|
||||
}
|
||||
|
||||
void setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes);
|
||||
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<TimestampPacketContainer> timestampPacketsForPipeControlWrite;
|
||||
CommandQueue &cmdQ;
|
||||
CommandStreamReceiver &csr;
|
||||
uint32_t clCommandType;
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/command_stream/linear_stream.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
@ -44,9 +45,11 @@ void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
|
||||
std::swap(timestampPacketNodes, pendingNodes);
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::assignAndIncrementNodesRefCounts(TimestampPacketContainer ×tampPacketContainer) {
|
||||
timestampPacketNodes = timestampPacketContainer.timestampPacketNodes;
|
||||
for (auto &node : timestampPacketNodes) {
|
||||
void TimestampPacketContainer::assignAndIncrementNodesRefCounts(TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
auto &inputNodes = inputTimestampPacketContainer.peekNodes();
|
||||
std::copy(inputNodes.begin(), inputNodes.end(), std::back_inserter(timestampPacketNodes));
|
||||
|
||||
for (auto &node : inputNodes) {
|
||||
node->incRefCount();
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/properties_helper.h"
|
||||
|
||||
#include <cstdint>
|
||||
@ -15,6 +16,8 @@
|
||||
#include <vector>
|
||||
|
||||
namespace OCLRT {
|
||||
class CommandStreamReceiver;
|
||||
class LinearStream;
|
||||
class MemoryManager;
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
@ -90,7 +93,7 @@ class TimestampPacketContainer : public NonCopyableOrMovableClass {
|
||||
const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
|
||||
void add(Node *timestampPacketNode);
|
||||
void swapNodes(TimestampPacketContainer ×tampPacketContainer);
|
||||
void assignAndIncrementNodesRefCounts(TimestampPacketContainer ×tampPacketContainer);
|
||||
void assignAndIncrementNodesRefCounts(TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
void resolveDependencies(bool clearAllDependencies);
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/utilities/debug_settings_reader.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/os_interface/os_interface.h"
|
||||
#include "runtime/os_interface/windows/os_context_win.h"
|
||||
#include "runtime/os_interface/windows/os_interface.h"
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/os_interface/windows/windows_wrapper.h"
|
||||
#include "runtime/sharings/gl/gl_arb_sync_event.h"
|
||||
#include "runtime/sharings/gl/gl_sharing.h"
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/sharings/sharing_factory.h"
|
||||
#include "runtime/sharings/gl/gl_sharing.h"
|
||||
#include "runtime/sharings/gl/gl_arb_sync_event.h"
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
|
||||
#include "public/cl_gl_private_intel.h"
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "runtime/sharings/gl/gl_sharing.h"
|
||||
#include "runtime/sharings/sharing_factory.h"
|
||||
#include "runtime/sharings/sharing_factory.inl"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "runtime/event/async_events_handler.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/mocks/mock_async_event_handler.h"
|
||||
|
@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "runtime/execution_environment/execution_environment.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
|
@ -1110,6 +1110,26 @@ HWTEST_F(KernelCommandsHelperTests, whenProgrammingMiAtomicThenSetupAllFields) {
|
||||
EXPECT_EQ(0, memcmp(&referenceCommand, miAtomic, sizeof(MI_ATOMIC)));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsHelperTests, givenWriteAddressAndDataWhenProgrammingPipeControlThenSetupAllFields) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
uint64_t writeAddress = 0x10000;
|
||||
uint64_t writeData = 1234;
|
||||
|
||||
uint8_t buffer[1024] = {};
|
||||
LinearStream cmdStream(buffer, 1024);
|
||||
|
||||
PIPE_CONTROL referenceCommand = PIPE_CONTROL::sInit();
|
||||
referenceCommand.setCommandStreamerStallEnable(true);
|
||||
referenceCommand.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
|
||||
referenceCommand.setAddress(static_cast<uint32_t>(writeAddress & 0x0000FFFFFFFFULL));
|
||||
referenceCommand.setAddressHigh(static_cast<uint32_t>(writeAddress >> 32));
|
||||
referenceCommand.setImmediateData(writeData);
|
||||
|
||||
KernelCommandsHelper<FamilyType>::programPipeControlDataWriteWithCsStall(cmdStream, writeAddress, writeData);
|
||||
EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed());
|
||||
EXPECT_EQ(0, memcmp(&referenceCommand, buffer, sizeof(PIPE_CONTROL)));
|
||||
}
|
||||
|
||||
typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest;
|
||||
|
||||
HWTEST_P(ParentKernelCommandsFromBinaryTest, getSizeRequiredForExecutionModelForSurfaceStatesReturnsSizeOfBlocksPlusMaxBindingTableSizeForAllIDTEntriesAndSchedulerSSHSize) {
|
||||
|
@ -416,13 +416,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
|
||||
UserEvent event2;
|
||||
event2.setStatus(CL_COMPLETE);
|
||||
Event event3(cmdQ1.get(), 0, 0, 0);
|
||||
event3.setTimestampPacketNodes(timestamp3);
|
||||
event3.addTimestampPacketNodes(timestamp3);
|
||||
Event event4(cmdQ2.get(), 0, 0, 0);
|
||||
event4.setTimestampPacketNodes(timestamp4);
|
||||
event4.addTimestampPacketNodes(timestamp4);
|
||||
Event event5(cmdQ1.get(), 0, 0, 0);
|
||||
event5.setTimestampPacketNodes(timestamp5);
|
||||
event5.addTimestampPacketNodes(timestamp5);
|
||||
Event event6(cmdQ2.get(), 0, 0, 0);
|
||||
event6.setTimestampPacketNodes(timestamp6);
|
||||
event6.addTimestampPacketNodes(timestamp6);
|
||||
|
||||
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
|
||||
|
||||
@ -464,9 +464,9 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
|
||||
|
||||
UserEvent userEvent;
|
||||
Event event0(cmdQ1.get(), 0, 0, 0);
|
||||
event0.setTimestampPacketNodes(timestamp0);
|
||||
event0.addTimestampPacketNodes(timestamp0);
|
||||
Event event1(cmdQ2.get(), 0, 0, 0);
|
||||
event1.setTimestampPacketNodes(timestamp1);
|
||||
event1.addTimestampPacketNodes(timestamp1);
|
||||
|
||||
cl_event waitlist[] = {&userEvent, &event0, &event1};
|
||||
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
|
||||
@ -511,13 +511,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
|
||||
UserEvent event1;
|
||||
UserEvent event2;
|
||||
Event event3(mockCmdQ.get(), 0, 0, 0);
|
||||
event3.setTimestampPacketNodes(timestamp3);
|
||||
event3.addTimestampPacketNodes(timestamp3);
|
||||
Event event4(&mockCmdQ2, 0, 0, 0);
|
||||
event4.setTimestampPacketNodes(timestamp4);
|
||||
event4.addTimestampPacketNodes(timestamp4);
|
||||
Event event5(mockCmdQ.get(), 0, 0, 0);
|
||||
event5.setTimestampPacketNodes(timestamp5);
|
||||
event5.addTimestampPacketNodes(timestamp5);
|
||||
Event event6(&mockCmdQ2, 0, 0, 0);
|
||||
event6.setTimestampPacketNodes(timestamp6);
|
||||
event6.addTimestampPacketNodes(timestamp6);
|
||||
|
||||
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
|
||||
|
||||
@ -735,9 +735,9 @@ HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueu
|
||||
node2.add(tagNode2);
|
||||
|
||||
Event event0(cmdQ1.get(), 0, 0, 0);
|
||||
event0.setTimestampPacketNodes(node1);
|
||||
event0.addTimestampPacketNodes(node1);
|
||||
Event event1(cmdQ2.get(), 0, 0, 0);
|
||||
event1.setTimestampPacketNodes(node2);
|
||||
event1.addTimestampPacketNodes(node2);
|
||||
|
||||
cl_event waitlist[] = {&event0, &event1};
|
||||
|
||||
@ -791,3 +791,166 @@ TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObta
|
||||
mockCmdQ->obtainNewTimestampPacketNodes(dispatchSize, previousNodes);
|
||||
EXPECT_EQ(dispatchSize, mockCmdQ->timestampPacketContainer->peekNodes().size());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutKernelThenInheritTimestampPacketsWithoutSubmitting) {
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*device, context.get());
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestmapPacket
|
||||
|
||||
TimestampPacketContainer cmdQNodes(device->getMemoryManager());
|
||||
cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer);
|
||||
|
||||
MockTimestampPacketContainer node1(device->getMemoryManager(), 1);
|
||||
MockTimestampPacketContainer node2(device->getMemoryManager(), 1);
|
||||
|
||||
Event event0(&cmdQ, 0, 0, 0);
|
||||
event0.addTimestampPacketNodes(node1);
|
||||
Event event1(&cmdQ, 0, 0, 0);
|
||||
event1.addTimestampPacketNodes(node2);
|
||||
|
||||
cl_event waitlist[] = {&event0, &event1};
|
||||
|
||||
cl_event clOutEvent;
|
||||
cmdQ.enqueueMarkerWithWaitList(2, waitlist, &clOutEvent);
|
||||
|
||||
auto outEvent = castToObject<Event>(clOutEvent);
|
||||
|
||||
EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // no new nodes obtained
|
||||
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
|
||||
|
||||
auto &eventsNodes = outEvent->getTimestampPacketNodes()->peekNodes();
|
||||
EXPECT_EQ(3u, eventsNodes.size());
|
||||
EXPECT_EQ(cmdQNodes.peekNodes().at(0), eventsNodes.at(0));
|
||||
EXPECT_EQ(event0.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(1));
|
||||
EXPECT_EQ(event1.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(2));
|
||||
|
||||
clReleaseEvent(clOutEvent);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndOutputEventWhenEnqueueingMarkerThenObtainNewPacketAndEmitPipeControlWithWrite) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*device, context.get());
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestmapPacket
|
||||
|
||||
TimestampPacketContainer cmdQNodes(device->getMemoryManager());
|
||||
cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer);
|
||||
|
||||
cl_event clOutEvent;
|
||||
cmdQ.enqueueMarkerWithWaitList(0, nullptr, &clOutEvent);
|
||||
|
||||
EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained
|
||||
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
|
||||
|
||||
bool pipeControlFound = false;
|
||||
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
|
||||
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (pipeControl &&
|
||||
pipeControl->getAddress() == expectedAddressLow &&
|
||||
pipeControl->getAddressHigh() == expectedAddressHigh &&
|
||||
pipeControl->getImmediateData() == 0) {
|
||||
pipeControlFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(pipeControlFound);
|
||||
|
||||
clReleaseEvent(clOutEvent);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) {
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
|
||||
|
||||
cmdQ.enqueueMarkerWithWaitList(0, nullptr, nullptr);
|
||||
EXPECT_EQ(0u, cmdQ.timestampPacketContainer->peekNodes().size());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithDataWrite) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*device, context.get());
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestmapPacket
|
||||
|
||||
TimestampPacketContainer cmdQNodes(device->getMemoryManager());
|
||||
cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer);
|
||||
|
||||
cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr);
|
||||
|
||||
EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained
|
||||
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
|
||||
|
||||
EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation()));
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
|
||||
|
||||
bool pipeControlFound = false;
|
||||
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
|
||||
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (pipeControl &&
|
||||
pipeControl->getAddress() == expectedAddressLow &&
|
||||
pipeControl->getAddressHigh() == expectedAddressHigh &&
|
||||
pipeControl->getImmediateData() == 0) {
|
||||
pipeControlFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(pipeControlFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithWrite) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.timestampPacketWriteEnabled = true;
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist[] = {&userEvent};
|
||||
cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation()));
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
|
||||
|
||||
bool pipeControlFound = false;
|
||||
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
|
||||
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (pipeControl &&
|
||||
pipeControl->getAddress() == expectedAddressLow &&
|
||||
pipeControl->getAddressHigh() == expectedAddressHigh &&
|
||||
pipeControl->getImmediateData() == 0) {
|
||||
pipeControlFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(pipeControlFound);
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
#include "test.h"
|
||||
#include "unit_tests/fixtures/memory_allocator_fixture.h"
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/os_interface/linux/allocator_helper.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "runtime/gen_common/hw_cmds.h"
|
||||
#include "runtime/helpers/built_ins_helper.h"
|
||||
#include "runtime/helpers/gmm_callbacks.h"
|
||||
#include "runtime/helpers/flush_stamp.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
#include "runtime/os_interface/os_interface.h"
|
||||
#include "runtime/os_interface/windows/os_context_win.h"
|
||||
#include "runtime/os_interface/windows/os_interface.h"
|
||||
|
Reference in New Issue
Block a user