mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
Pass more information to programPipelineSelect.
Change-Id: Iaabe60742269b721f8defe71306dd6e87d60d546
This commit is contained in:
@@ -526,6 +526,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
|
||||
auto mediaSamplerRequired = false;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
auto specialPipelineSelectMode = false;
|
||||
Kernel *kernel = nullptr;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
@@ -538,6 +539,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
@@ -595,6 +597,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
}
|
||||
dispatchFlags.numGrfRequired = numGrfRequired;
|
||||
dispatchFlags.specialPipelineSelectMode = specialPipelineSelectMode;
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
|
||||
@@ -218,6 +218,7 @@ class CommandStreamReceiver {
|
||||
bool stallingPipeControlOnNextFlushRequired = false;
|
||||
bool timestampPacketWriteEnabled = false;
|
||||
bool nTo1SubmissionModelEnabled = false;
|
||||
bool lastSpecialPipelineSelectMode = false;
|
||||
};
|
||||
|
||||
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump, ExecutionEnvironment &executionEnvironment);
|
||||
|
||||
@@ -121,7 +121,7 @@ inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw<GfxFamily>::add
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
|
||||
if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) {
|
||||
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, dispatchFlags.mediaSamplerRequired);
|
||||
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, dispatchFlags);
|
||||
this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired;
|
||||
}
|
||||
}
|
||||
@@ -239,6 +239,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
|
||||
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
|
||||
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
|
||||
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode;
|
||||
|
||||
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ struct DispatchFlags {
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
EventsRequest *outOfDeviceDependencies = nullptr;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
bool specialPipelineSelectMode = false;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
@@ -54,5 +55,6 @@ struct CsrSizeRequestFlags {
|
||||
bool mediaSamplerConfigChanged = false;
|
||||
bool hasSharedHandles = false;
|
||||
bool numGrfRequiredChanged = false;
|
||||
bool specialPipelineSelectModeChanged = false;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/preamble.inl"
|
||||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include "runtime/helpers/preamble.inl"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
@@ -67,7 +68,7 @@ void PreambleHelper<CNLFamily>::addPipeControlBeforeVfeCmd(LinearStream *pComman
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
|
||||
void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
|
||||
typedef typename CNLFamily::PIPELINE_SELECT PIPELINE_SELECT;
|
||||
|
||||
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
|
||||
@@ -76,7 +77,7 @@ void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStre
|
||||
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
|
||||
pCmd->setMaskBits(mask);
|
||||
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
|
||||
pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired);
|
||||
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
template struct PreambleHelper<CNLFamily>;
|
||||
|
||||
@@ -32,7 +32,7 @@ uint32_t PreambleHelper<BDWFamily>::getL3Config(const HardwareInfo &hwInfo, bool
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
|
||||
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
|
||||
typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT;
|
||||
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
|
||||
*pCmd = PIPELINE_SELECT::sInit();
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include "runtime/helpers/preamble.inl"
|
||||
|
||||
namespace OCLRT {
|
||||
@@ -27,7 +28,7 @@ uint32_t PreambleHelper<SKLFamily>::getL3Config(const HardwareInfo &hwInfo, bool
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
|
||||
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
|
||||
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
|
||||
|
||||
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
|
||||
@@ -36,7 +37,7 @@ void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStre
|
||||
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
|
||||
pCmd->setMaskBits(mask);
|
||||
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
|
||||
pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired);
|
||||
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace OCLRT {
|
||||
const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;
|
||||
|
||||
@@ -7,14 +7,15 @@
|
||||
|
||||
#pragma once
|
||||
#include "igfxfmid.h"
|
||||
#include "stdint.h"
|
||||
#include "runtime/helpers/pipeline_select_helper.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
struct HardwareInfo;
|
||||
class Device;
|
||||
struct DispatchFlags;
|
||||
class GraphicsAllocation;
|
||||
class LinearStream;
|
||||
|
||||
@@ -26,7 +27,7 @@ struct PreambleHelper {
|
||||
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
|
||||
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags);
|
||||
static uint32_t getDefaultThreadArbitrationPolicy();
|
||||
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
|
||||
static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);
|
||||
|
||||
@@ -206,6 +206,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
}
|
||||
dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
|
||||
@@ -276,15 +276,16 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
const void *getKernelArg(uint32_t argIndex) const;
|
||||
const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const;
|
||||
|
||||
bool getAllowNonUniform() { return program->getAllowNonUniform(); }
|
||||
bool isVmeKernel() { return kernelInfo.isVmeWorkload; };
|
||||
bool getAllowNonUniform() const { return program->getAllowNonUniform(); }
|
||||
bool isVmeKernel() const { return kernelInfo.isVmeWorkload; }
|
||||
bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; }
|
||||
|
||||
//residency for kernel surfaces
|
||||
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
||||
bool requiresCoherency();
|
||||
void resetSharedObjectsPatchAddresses();
|
||||
bool isUsingSharedObjArgs() { return usingSharedObjArgs; }
|
||||
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
||||
|
||||
bool hasPrintfOutput() const;
|
||||
|
||||
@@ -490,5 +491,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
|
||||
bool specialPipelineSelectMode = false;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -158,7 +158,7 @@ uint32_t PreambleHelper<GENX>::getL3Config(const HardwareInfo &hwInfo, bool useS
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<GENX>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
|
||||
void PreambleHelper<GENX>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -225,6 +225,8 @@ class MockKernel : public Kernel {
|
||||
Kernel::releaseOwnership();
|
||||
}
|
||||
|
||||
void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; }
|
||||
|
||||
uint32_t makeResidentCalls = 0;
|
||||
uint32_t getResidencyCalls = 0;
|
||||
mutable uint32_t takeOwnershipCalls = 0;
|
||||
|
||||
Reference in New Issue
Block a user