Allow Device creating multiple CSRs [3/n]

Add CSR from Device to CommandQueue

Change-Id: Iaccf3c73d25e357242837677777d0513e81f520e
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-11-22 13:57:10 +01:00
committed by sys_ocldev
parent fbf0d44fff
commit 3ad33bf1b8
72 changed files with 239 additions and 242 deletions

View File

@@ -1797,7 +1797,7 @@ cl_int CL_API_CALL clSetUserEventStatus(cl_event event,
return retVal;
}
auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getCommandStreamReceiver().obtainUniqueOwnership();
auto commandStreamReceiverOwnership = userEvent->getContext()->getDevice(0)->getEngine(0).commandStreamReceiver->obtainUniqueOwnership();
userEvent->setStatus(executionStatus);
return retVal;
}

View File

@@ -77,8 +77,11 @@ CommandQueue::CommandQueue(Context *context,
commandQueueProperties = getCmdQueueProperties<cl_command_queue_properties>(properties);
flushStamp.reset(new FlushStampTracker(true));
if (device && device->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
timestampPacketContainer = std::make_unique<TimestampPacketContainer>(device->getMemoryManager());
if (device) {
engine = &device->getEngine(engineId);
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
timestampPacketContainer = std::make_unique<TimestampPacketContainer>(device->getMemoryManager());
}
}
}
@@ -90,7 +93,7 @@ CommandQueue::~CommandQueue() {
}
if (device) {
auto storageForAllocation = device->getCommandStreamReceiver().getInternalAllocationStorage();
auto storageForAllocation = getCommandStreamReceiver().getInternalAllocationStorage();
if (commandStream) {
storageForAllocation->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
@@ -112,13 +115,17 @@ CommandQueue::~CommandQueue() {
}
}
CommandStreamReceiver &CommandQueue::getCommandStreamReceiver() const {
return *engine->commandStreamReceiver;
}
uint32_t CommandQueue::getHwTag() const {
uint32_t tag = *getHwTagAddress();
return tag;
}
volatile uint32_t *CommandQueue::getHwTagAddress() const {
return device->getCommandStreamReceiver().getTagAddress();
return getCommandStreamReceiver().getTagAddress();
}
bool CommandQueue::isCompleted(uint32_t taskCount) const {
@@ -135,7 +142,7 @@ void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushS
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, *device->getOsContext(), forcePowerSavingMode);
getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, *device->getOsContext(), forcePowerSavingMode);
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
latestTaskCountWaited = taskCountToWait;
@@ -161,7 +168,7 @@ bool CommandQueue::isQueueBlocked() {
//at this point we may reset queue TaskCount, since all command previous to this were aborted
taskCount = 0;
flushStamp->setStamp(0);
taskLevel = getDevice().getCommandStreamReceiver().peekTaskLevel();
taskLevel = getCommandStreamReceiver().peekTaskLevel();
}
DebugManager.log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load());
@@ -196,9 +203,8 @@ uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
LinearStream &CommandQueue::getCS(size_t minRequiredSize) {
DEBUG_BREAK_IF(nullptr == device);
auto &commandStreamReceiver = device->getCommandStreamReceiver();
auto storageForAllocation = commandStreamReceiver.getInternalAllocationStorage();
auto memoryManager = commandStreamReceiver.getMemoryManager();
auto storageForAllocation = getCommandStreamReceiver().getInternalAllocationStorage();
auto memoryManager = getCommandStreamReceiver().getMemoryManager();
DEBUG_BREAK_IF(nullptr == memoryManager);
if (!commandStream) {
@@ -503,8 +509,6 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
MemObjOffsetArray &copyOffset,
bool readOnly,
EventBuilder &externalEventBuilder) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
EventBuilder internalEventBuilder;
EventBuilder *eventBuilder;
// check if event will be exposed externally
@@ -518,7 +522,7 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
}
//store task data in event
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, commandStreamReceiver, *this));
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, getCommandStreamReceiver(), *this));
eventBuilder->getEvent()->setCommand(std::move(cmd));
//bind output event with input events
@@ -534,11 +538,10 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
}
bool CommandQueue::setupDebugSurface(Kernel *kernel) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation();
auto debugSurface = getCommandStreamReceiver().getDebugSurfaceAllocation();
if (!debugSurface) {
debugSurface = commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
debugSurface = getCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
}
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
@@ -552,15 +555,15 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
}
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
return this->getDevice().getCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
return getCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
}
void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap) {
this->getDevice().getCommandStreamReceiver().allocateHeapMemory(heapType, minRequiredSize, indirectHeap);
getCommandStreamReceiver().allocateHeapMemory(heapType, minRequiredSize, indirectHeap);
}
void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
this->getDevice().getCommandStreamReceiver().releaseIndirectHeap(heapType);
getCommandStreamReceiver().releaseIndirectHeap(heapType);
}
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
@@ -575,7 +578,7 @@ void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo,
}
void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes) {
auto preferredPoolSize = device->getCommandStreamReceiver().getPreferredTagPoolSize();
auto preferredPoolSize = getCommandStreamReceiver().getPreferredTagPoolSize();
auto allocator = device->getMemoryManager()->obtainTimestampPacketAllocator(preferredPoolSize);

View File

@@ -7,6 +7,7 @@
#pragma once
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/engine_control.h"
#include "runtime/helpers/task_information.h"
#include "instrumentation.h"
#include <atomic>
@@ -324,6 +325,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
CommandStreamReceiver &getCommandStreamReceiver() const;
Device &getDevice() { return *device; }
Context &getContext() { return *context; }
Context *getContextPtr() { return context; }
@@ -417,13 +419,15 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes);
Context *context;
Device *device;
Context *context = nullptr;
Device *device = nullptr;
EngineControl *engine = nullptr;
cl_command_queue_properties commandQueueProperties;
QueuePriority priority;
QueueThrottle throttle;
size_t engineId = 0;
bool perfCountersEnabled;
cl_uint perfCountersConfig;

View File

@@ -50,8 +50,8 @@ class CommandQueueHw : public CommandQueue {
}
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
device->getCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
device->getCommandStreamReceiver().enableNTo1SubmissionModel();
getCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
getCommandStreamReceiver().enableNTo1SubmissionModel();
}
}

View File

@@ -51,7 +51,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
*eventsRequest.outEvent = outEventObj;
}
auto commandStreamReceieverOwnership = device->getCommandStreamReceiver().obtainUniqueOwnership();
auto commandStreamReceieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
auto blockQueue = false;

View File

@@ -142,8 +142,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
HwTimeStamps *hwTimeStamps = nullptr;
auto &commandStreamReceiver = device->getCommandStreamReceiver();
auto commandStreamRecieverOwnership = commandStreamReceiver.obtainUniqueOwnership();
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
TimeStampData queueTimeStamp;
if (isProfilingEnabled() && event) {
@@ -191,7 +191,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
enqueueHandlerHook(commandType, multiDispatchInfo);
if (DebugManager.flags.AUBDumpSubCaptureMode.get()) {
commandStreamReceiver.activateAubSubCapture(multiDispatchInfo);
getCommandStreamReceiver().activateAubSubCapture(multiDispatchInfo);
}
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
@@ -216,12 +216,12 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
if (device->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
obtainNewTimestampPacketNodes(multiDispatchInfo.size(), previousTimestampPacketNodes);
}
if (eventBuilder.getEvent()) {
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
if (this->isProfilingEnabled()) {
@@ -263,17 +263,17 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &dispatchInfo : multiDispatchInfo) {
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
commandStreamReceiver.getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
getCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
}
}
}
commandStreamReceiver.setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
slmUsed = multiDispatchInfo.usesSlm();
} else if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (CL_COMMAND_BARRIER == commandType) {
commandStreamReceiver.requestStallingPipeControlOnNextFlush();
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
}
if (eventBuilder.getEvent()) {
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
@@ -292,7 +292,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (parentKernel) {
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*parentKernel));
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
uint32_t taskCount = getCommandStreamReceiver().peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
parentKernel,
@@ -321,12 +321,12 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
scheduler.makeResident(commandStreamReceiver);
scheduler.makeResident(getCommandStreamReceiver());
// Update SLM usage
slmUsed |= scheduler.slmTotalSize > 0;
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getCommandStreamReceiver());
if (parentKernel->isAuxTranslationRequired()) {
blocking = true;
}
@@ -354,7 +354,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
if (parentKernel) {
commandStreamReceiver.overrideMediaVFEStateDirty(true);
getCommandStreamReceiver().overrideMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
@@ -440,7 +440,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (printfHandler) {
printfHandler->printEnqueueOutput();
}
commandStreamReceiver.waitForTaskCountAndCleanAllocationList(completionStamp.taskCount, TEMPORARY_ALLOCATION);
getCommandStreamReceiver().waitForTaskCountAndCleanAllocationList(completionStamp.taskCount, TEMPORARY_ALLOCATION);
}
}
}
@@ -506,21 +506,20 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
UNRECOVERABLE_IF(multiDispatchInfo.empty());
auto &commandStreamReceiver = device->getCommandStreamReceiver();
auto implicitFlush = false;
if (printfHandler) {
blocking = true;
printfHandler->makeResident(commandStreamReceiver);
printfHandler->makeResident(getCommandStreamReceiver());
}
if (timestampPacketContainer) {
timestampPacketContainer->makeResident(device->getCommandStreamReceiver());
previousTimestampPacketNodes->makeResident(device->getCommandStreamReceiver());
timestampPacketContainer->makeResident(getCommandStreamReceiver());
previousTimestampPacketNodes->makeResident(getCommandStreamReceiver());
}
auto requiresCoherency = false;
for (auto surface : CreateRange(surfaces, surfaceCount)) {
surface->makeResident(commandStreamReceiver);
surface->makeResident(getCommandStreamReceiver());
requiresCoherency |= surface->IsCoherent;
}
@@ -534,7 +533,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
} else {
continue;
}
kernel->makeResident(commandStreamReceiver);
kernel->makeResident(getCommandStreamReceiver());
requiresCoherency |= kernel->requiresCoherency();
mediaSamplerRequired |= kernel->isVmeKernel();
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
@@ -550,9 +549,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
if (isProfilingEnabled() && eventBuilder.getEvent()) {
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
this->getDevice().getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getGraphicsAllocation());
getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getGraphicsAllocation());
if (isPerfCountersEnabled()) {
this->getDevice().getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getGraphicsAllocation());
getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getGraphicsAllocation());
}
}
@@ -571,11 +570,11 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
}
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
getCommandStreamReceiver().requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
auto allocNeedsFlushDC = false;
if (!device->isFullRangeSvm()) {
if (std::any_of(commandStreamReceiver.getResidencyAllocations().begin(), commandStreamReceiver.getResidencyAllocations().end(), [](const auto allocation) { return allocation->flushL3Required; })) {
if (std::any_of(getCommandStreamReceiver().getResidencyAllocations().begin(), getCommandStreamReceiver().getResidencyAllocations().end(), [](const auto allocation) { return allocation->flushL3Required; })) {
allocNeedsFlushDC = true;
}
}
@@ -592,8 +591,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.implicitFlush = implicitFlush;
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || getCommandStreamReceiver().isNTo1SubmissionModelEnabled();
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
}
dispatchFlags.numGrfRequired = numGrfRequired;
@@ -604,7 +603,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
gtpinNotifyPreFlushTask(this);
}
CompletionStamp completionStamp = commandStreamReceiver.flushTask(
CompletionStamp completionStamp = getCommandStreamReceiver().flushTask(
commandStream,
commandStreamStart,
*dsh,
@@ -631,8 +630,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> printfHandler) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
//store previous virtual event as it will add dependecies to new virtual event
@@ -666,7 +663,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
*this,
nullptr));
auto cmd = std::make_unique<CommandMarker>(*this, commandStreamReceiver, commandType, cmdSize);
auto cmd = std::make_unique<CommandMarker>(*this, getCommandStreamReceiver(), commandType, cmdSize);
eventBuilder->getEvent()->setCommand(std::move(cmd));
} else {

View File

@@ -73,7 +73,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
eventWaitList,
event);
auto storageForAllocation = device->getCommandStreamReceiver().getInternalAllocationStorage();
auto storageForAllocation = getCommandStreamReceiver().getInternalAllocationStorage();
storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), TEMPORARY_ALLOCATION, taskCount);
return CL_SUCCESS;

View File

@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
if (size != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -76,7 +76,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -82,7 +82,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -222,8 +222,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
auto memoryManager = getDevice().getMemoryManager();
DEBUG_BREAK_IF(nullptr == memoryManager);
auto commandStreamReceieverOwnership = device->getCommandStreamReceiver().obtainUniqueOwnership();
auto storageWithAllocations = device->getCommandStreamReceiver().getInternalAllocationStorage();
auto commandStreamReceieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
auto storageWithAllocations = getCommandStreamReceiver().getInternalAllocationStorage();
auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, false).release();
commandStreamReceieverOwnership.unlock();

View File

@@ -96,7 +96,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
if (size != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -75,7 +75,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -76,7 +76,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getDevice().getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}

View File

@@ -15,8 +15,7 @@ namespace OCLRT {
template <typename GfxFamily>
cl_int CommandQueueHw<GfxFamily>::finish(bool dcFlush) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
commandStreamReceiver.flushBatchedSubmissions();
getCommandStreamReceiver().flushBatchedSubmissions();
//as long as queue is blocked we need to stall.
while (isQueueBlocked())
@@ -28,7 +27,7 @@ cl_int CommandQueueHw<GfxFamily>::finish(bool dcFlush) {
// Stall until HW reaches CQ taskCount
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false);
commandStreamReceiver.waitForTaskCountAndCleanAllocationList(taskCountToWaitFor, TEMPORARY_ALLOCATION);
getCommandStreamReceiver().waitForTaskCountAndCleanAllocationList(taskCountToWaitFor, TEMPORARY_ALLOCATION);
return CL_SUCCESS;
}

View File

@@ -1,32 +1,16 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
* Copyright (C) 2017-2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace OCLRT {
template <typename GfxFamily>
cl_int CommandQueueHw<GfxFamily>::flush() {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
commandStreamReceiver.flushBatchedSubmissions();
getCommandStreamReceiver().flushBatchedSubmissions();
return CL_SUCCESS;
}
} // namespace OCLRT

View File

@@ -409,7 +409,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
}
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto semaphoreSize = sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
auto atomicSize = sizeof(typename GfxFamily::MI_ATOMIC);

View File

@@ -69,7 +69,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
commandStream = &commandQueue.getCS(0);
bool dcFlush = false;
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
commandQueue.getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
@@ -164,7 +164,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
// Do not put BB_START only when returning in first Scheduler run
if (devQueueHw.getSchedulerReturnInstance() != 1) {
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, true);
commandQueue.getCommandStreamReceiver().addPipeControl(*commandStream, true);
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
auto *bbStart = (MI_BATCH_BUFFER_START *)commandStream->getSpace(sizeof(MI_BATCH_BUFFER_START));

View File

@@ -67,7 +67,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
using UniqueIH = std::unique_ptr<IndirectHeap>;
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh),
UniqueIH(ssh), *commandQueue.getDevice().getCommandStreamReceiver().getInternalAllocationStorage());
UniqueIH(ssh), *commandQueue.getCommandStreamReceiver().getInternalAllocationStorage());
if (parentKernel) {
(*blockedCommandsData)->doNotFreeISH = true;
}
@@ -81,7 +81,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
}
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
numEventsInWaitList, eventWaitList);
if (previousTimestampPacketNodes) {
@@ -183,7 +183,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
dispatchWorkarounds(commandStream, commandQueue, kernel, true);
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
}
@@ -191,7 +191,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
// Program the walker. Invokes execution so all state should already be programmed
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
}

View File

@@ -168,7 +168,7 @@ bool Context::createImpl(const cl_context_properties *properties,
memoryManager->getDeferredDeleter()->addClient();
}
if (this->sharingFunctions[SharingType::VA_SHARING]) {
device->getCommandStreamReceiver().peekKmdNotifyHelper()->initMaxPowerSavingMode();
device->initMaxPowerSavingMode();
}
}

View File

@@ -250,4 +250,10 @@ GFXCORE_FAMILY Device::getRenderCoreFamily() const {
bool Device::isSourceLevelDebuggerActive() const {
return deviceInfo.sourceLevelDebuggerActive;
}
void Device::initMaxPowerSavingMode() {
for (auto &engine : engines) {
engine.commandStreamReceiver->peekKmdNotifyHelper()->initMaxPowerSavingMode();
}
}
} // namespace OCLRT

View File

@@ -67,13 +67,14 @@ class Device : public BaseObject<_cl_device_id> {
return engineType;
}
void initMaxPowerSavingMode();
void *getSLMWindowStartAddress();
void prepareSLMWindow();
void setForce32BitAddressing(bool value) {
deviceInfo.force32BitAddressess = value;
}
CommandStreamReceiver &getCommandStreamReceiver();
EngineControl &getEngine(size_t engineId);
volatile uint32_t *getTagAddress() const;
@@ -176,8 +177,8 @@ inline void Device::getCap(const void *&src,
retSize = size = DeviceInfoTable::Map<Param>::size;
}
inline CommandStreamReceiver &Device::getCommandStreamReceiver() {
return *engines[0].commandStreamReceiver;
inline EngineControl &Device::getEngine(size_t engineId) {
return engines[engineId];
}
inline volatile uint32_t *Device::getTagAddress() const {

View File

@@ -63,7 +63,7 @@ Event::Event(
if ((this->ctx == nullptr) && (cmdQueue != nullptr)) {
this->ctx = &cmdQueue->getContext();
if (cmdQueue->getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (cmdQueue->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
timestampPacketContainer = std::make_unique<TimestampPacketContainer>(cmdQueue->getDevice().getMemoryManager());
}
}
@@ -310,7 +310,7 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
DEBUG_BREAK_IF(this->taskLevel == Event::eventNotReady && this->executionStatus >= 0);
auto *allocationStorage = cmdQueue->getDevice().getCommandStreamReceiver().getInternalAllocationStorage();
auto *allocationStorage = cmdQueue->getCommandStreamReceiver().getInternalAllocationStorage();
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
return true;
@@ -346,7 +346,7 @@ void Event::updateExecutionStatus() {
transitionExecutionStatus(CL_COMPLETE);
executeCallbacks(CL_COMPLETE);
unblockEventsBlockedByThis(CL_COMPLETE);
auto *allocationStorage = cmdQueue->getDevice().getCommandStreamReceiver().getInternalAllocationStorage();
auto *allocationStorage = cmdQueue->getCommandStreamReceiver().getInternalAllocationStorage();
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
return;
}
@@ -452,7 +452,7 @@ void Event::submitCommand(bool abortTasks) {
if (cmdToProcess.get() != nullptr) {
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
if (timeStampNode) {
this->cmdQueue->getDevice().getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
cmdToProcess->timestamp = timeStampNode->tag;
}
if (profilingCpuPath) {
@@ -462,7 +462,7 @@ void Event::submitCommand(bool abortTasks) {
this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
}
if (perfCountersEnabled && perfCounterNode) {
this->cmdQueue->getDevice().getCommandStreamReceiver().makeResident(*perfCounterNode->getGraphicsAllocation());
this->cmdQueue->getCommandStreamReceiver().makeResident(*perfCounterNode->getGraphicsAllocation());
}
}
auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks);
@@ -479,7 +479,7 @@ void Event::submitCommand(bool abortTasks) {
if (!this->isUserEvent() && this->eventWithoutCommand) {
if (this->cmdQueue) {
TakeOwnershipWrapper<Device> deviceOwnerhsip(this->cmdQueue->getDevice());
updateTaskCount(this->cmdQueue->getDevice().getCommandStreamReceiver().peekTaskCount());
updateTaskCount(this->cmdQueue->getCommandStreamReceiver().peekTaskCount());
}
}
}
@@ -637,7 +637,7 @@ void Event::tryFlushEvent() {
if (cmdQueue && updateStatusAndCheckCompletion() == false) {
//flush the command queue only if it is not blocked event
if (taskLevel != Event::eventNotReady) {
cmdQueue->getDevice().getCommandStreamReceiver().flushBatchedSubmissions();
cmdQueue->getCommandStreamReceiver().flushBatchedSubmissions();
}
}
}
@@ -670,7 +670,7 @@ void Event::setEndTimeStamp() {
TagNode<HwTimeStamps> *Event::getHwTimeStampNode() {
if (!timeStampNode) {
auto &device = getCommandQueue()->getDevice();
auto preferredPoolSize = device.getCommandStreamReceiver().getPreferredTagPoolSize();
auto preferredPoolSize = cmdQueue->getCommandStreamReceiver().getPreferredTagPoolSize();
timeStampNode = device.getMemoryManager()->obtainEventTsAllocator(preferredPoolSize)->getTag();
}
@@ -680,7 +680,7 @@ TagNode<HwTimeStamps> *Event::getHwTimeStampNode() {
TagNode<HwPerfCounter> *Event::getHwPerfCounterNode() {
if (!perfCounterNode) {
auto &device = getCommandQueue()->getDevice();
auto preferredPoolSize = device.getCommandStreamReceiver().getPreferredTagPoolSize();
auto preferredPoolSize = cmdQueue->getCommandStreamReceiver().getPreferredTagPoolSize();
perfCounterNode = device.getMemoryManager()->obtainEventPerfCountAllocator(preferredPoolSize)->getTag();
}

View File

@@ -35,8 +35,8 @@ uint32_t UserEvent::getTaskLevel() {
uint32_t taskLevel = 0;
if (ctx != nullptr) {
Device *pDevice = ctx->getDevice(0);
auto &csr = pDevice->getCommandStreamReceiver();
taskLevel = csr.peekTaskLevel();
auto csr = pDevice->getEngine(0).commandStreamReceiver;
taskLevel = csr->peekTaskLevel();
}
return taskLevel;
}
@@ -68,9 +68,8 @@ bool VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) {
uint32_t VirtualEvent::getTaskLevel() {
uint32_t taskLevel = 0;
if (ctx != nullptr) {
Device *pDevice = ctx->getDevice(0);
auto &csr = pDevice->getCommandStreamReceiver();
if (cmdQueue != nullptr) {
auto &csr = cmdQueue->getCommandStreamReceiver();
taskLevel = csr.peekTaskLevel();
}
return taskLevel;

View File

@@ -117,7 +117,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
if (terminated) {
return completionStamp;
}
auto &commandStreamReceiver = commandQueue.getDevice().getCommandStreamReceiver();
auto &commandStreamReceiver = commandQueue.getCommandStreamReceiver();
bool executionModelKernel = kernel->isParentKernel;
auto devQueue = commandQueue.getContext().getDefaultDeviceQueue();

View File

@@ -166,7 +166,7 @@ bool Platform::initialize() {
}
}
CommandStreamReceiverType csrType = this->devices[0]->getCommandStreamReceiver().getType();
CommandStreamReceiverType csrType = this->devices[0]->getEngine(0).commandStreamReceiver->getType();
if (csrType != CommandStreamReceiverType::CSR_HW) {
executionEnvironment->initAubCenter(&hwInfo[0], this->devices[0]->getEnableLocalMemory());
}

View File

@@ -29,7 +29,7 @@ bool GlArbSyncEvent::setBaseEvent(Event &ev) {
UNRECOVERABLE_IF(ev.getContext() == nullptr);
UNRECOVERABLE_IF(ev.getCommandQueue() == nullptr);
auto cmdQueue = ev.getCommandQueue();
auto osInterface = cmdQueue->getDevice().getCommandStreamReceiver().getOSInterface();
auto osInterface = cmdQueue->getCommandStreamReceiver().getOSInterface();
UNRECOVERABLE_IF(osInterface == nullptr);
if (false == ctx->getSharing<OCLRT::GLSharingFunctions>()->glArbSyncObjectSetup(*osInterface, *glSyncInfo)) {
return false;

View File

@@ -53,7 +53,7 @@ void GlSyncEvent::updateExecutionStatus() {
}
uint32_t GlSyncEvent::getTaskLevel() {
auto &csr = ctx->getDevice(0)->getCommandStreamReceiver();
return csr.peekTaskLevel();
auto csr = ctx->getDevice(0)->getEngine(0).commandStreamReceiver;
return csr->peekTaskLevel();
}
} // namespace OCLRT