mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Grf configuration
Change-Id: I3741f53a38c6707b0c8ad82ae553ea65ae6917e4 Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
@ -534,6 +534,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
}
|
||||
|
||||
auto mediaSamplerRequired = false;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
Kernel *kernel = nullptr;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
@ -544,6 +545,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
kernel->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= kernel->requiresCoherency();
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
@ -593,7 +596,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
}
|
||||
|
||||
dispatchFlags.numGrfRequired = numGrfRequired;
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "runtime/helpers/completion_stamp.h"
|
||||
#include "runtime/helpers/flat_batch_buffer_helper.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/kernel/grf_config.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
@ -179,6 +180,7 @@ class CommandStreamReceiver {
|
||||
int8_t lastMediaSamplerConfig = -1;
|
||||
PreemptionMode lastPreemptionMode = PreemptionMode::Initial;
|
||||
uint32_t latestSentStatelessMocsConfig = 0;
|
||||
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
LinearStream commandStream;
|
||||
|
||||
|
@ -50,9 +50,9 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForL3Config() const;
|
||||
size_t getCmdSizeForPipelineSelect() const;
|
||||
size_t getCmdSizeForCoherency();
|
||||
size_t getCmdSizeForComputeMode();
|
||||
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
||||
void programCoherency(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, OsContext &osContext) override;
|
||||
const HardwareInfo &peekHwInfo() const { return hwInfo; }
|
||||
|
@ -228,6 +228,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast<int8_t>(dispatchFlags.requiresCoherency);
|
||||
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
|
||||
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
|
||||
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
|
||||
|
||||
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;
|
||||
|
||||
@ -255,7 +256,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
}
|
||||
initPageTableManagerRegisters(commandStreamCSR);
|
||||
programPreemption(commandStreamCSR, device, dispatchFlags);
|
||||
programCoherency(commandStreamCSR, dispatchFlags);
|
||||
programComputeMode(commandStreamCSR, dispatchFlags);
|
||||
programL3(commandStreamCSR, dispatchFlags, newL3Config);
|
||||
programPipelineSelect(commandStreamCSR, dispatchFlags);
|
||||
programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config);
|
||||
@ -628,7 +629,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
|
||||
|
||||
size += getCmdSizeForL3Config();
|
||||
size += getCmdSizeForCoherency();
|
||||
size += getCmdSizeForComputeMode();
|
||||
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
|
||||
size += getCmdSizeForPipelineSelect();
|
||||
size += getCmdSizeForPreemption(dispatchFlags);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "runtime/memory_manager/memory_constants.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/properties_helper.h"
|
||||
#include "runtime/kernel/grf_config.h"
|
||||
#include <limits>
|
||||
|
||||
namespace OCLRT {
|
||||
@ -43,6 +44,7 @@ struct DispatchFlags {
|
||||
FlushStampTrackingObj *flushStampReference = nullptr;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
EventsRequest *outOfDeviceDependencies = nullptr;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
@ -51,5 +53,6 @@ struct CsrSizeRequestFlags {
|
||||
bool preemptionRequestChanged = false;
|
||||
bool mediaSamplerConfigChanged = false;
|
||||
bool hasSharedHandles = false;
|
||||
bool numGrfRequiredChanged = false;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
@ -16,7 +16,7 @@ typedef CNLFamily Family;
|
||||
static auto gfxCore = IGFX_GEN10_CORE;
|
||||
|
||||
template <>
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
|
||||
if (csrSizeRequestFlags.coherencyRequestChanged) {
|
||||
return sizeof(typename Family::MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
@ -24,7 +24,7 @@ size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
|
||||
}
|
||||
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
if (csrSizeRequestFlags.coherencyRequestChanged) {
|
||||
LriHelper<Family>::program(&stream, gen10HdcModeRegisterAddresss, DwordBuilder::build(4, true, !dispatchFlags.requiresCoherency));
|
||||
this->lastSentCoherencyRequest = static_cast<int8_t>(dispatchFlags.requiresCoherency);
|
||||
|
@ -15,12 +15,12 @@ typedef BDWFamily Family;
|
||||
static auto gfxCore = IGFX_GEN8_CORE;
|
||||
|
||||
template <>
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -15,12 +15,12 @@ typedef SKLFamily Family;
|
||||
static auto gfxCore = IGFX_GEN9_CORE;
|
||||
|
||||
template <>
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForCoherency() {
|
||||
size_t CommandStreamReceiverHw<Family>::getCmdSizeForComputeMode() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::programCoherency(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
void CommandStreamReceiverHw<Family>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -7,11 +7,13 @@
|
||||
set(RUNTIME_SRCS_KERNEL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dynamic_kernel_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_reconfiguration.cpp
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL})
|
||||
|
12
runtime/kernel/grf_config.h
Normal file
12
runtime/kernel/grf_config.h
Normal file
@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace GrfConfig {
|
||||
constexpr uint32_t DefaultGrfNumber = 128u;
|
||||
}
|
@ -354,6 +354,8 @@ cl_int Kernel::initialize() {
|
||||
program->allocateBlockPrivateSurfaces();
|
||||
}
|
||||
|
||||
reconfigureKernel();
|
||||
|
||||
retVal = CL_SUCCESS;
|
||||
|
||||
} while (false);
|
||||
|
@ -458,6 +458,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
void resolveArgs();
|
||||
|
||||
void reconfigureKernel();
|
||||
|
||||
Program *program;
|
||||
Context *context;
|
||||
const Device &device;
|
||||
|
13
runtime/kernel/kernel_reconfiguration.cpp
Normal file
13
runtime/kernel/kernel_reconfiguration.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/kernel/kernel.h"
|
||||
|
||||
namespace OCLRT {
|
||||
void Kernel::reconfigureKernel() {
|
||||
}
|
||||
} // namespace OCLRT
|
@ -239,5 +239,6 @@ struct KernelInfo {
|
||||
bool isKernelHeapSubstituted = false;
|
||||
GraphicsAllocation *kernelAllocation = nullptr;
|
||||
DebugData debugData;
|
||||
bool computeMode = false;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
@ -535,7 +535,8 @@ cl_int Program::parsePatchList(KernelInfo &kernelInfo) {
|
||||
"\n .SubgroupIndependentForwardProgressRequired", kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired,
|
||||
"\n .WorkgroupWalkOrderDim0", kernelInfo.workgroupWalkOrder[0],
|
||||
"\n .WorkgroupWalkOrderDim1", kernelInfo.workgroupWalkOrder[1],
|
||||
"\n .WorkgroupWalkOrderDim2", kernelInfo.workgroupWalkOrder[2]);
|
||||
"\n .WorkgroupWalkOrderDim2", kernelInfo.workgroupWalkOrder[2],
|
||||
"\n .NumGRFRequired", kernelInfo.patchInfo.executionEnvironment->NumGRFRequired);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_DATA_PARAMETER_STREAM:
|
||||
|
@ -42,19 +42,19 @@ struct Gen10CoherencyRequirements : public ::testing::Test {
|
||||
GEN10TEST_F(Gen10CoherencyRequirements, coherencyCmdSize) {
|
||||
auto lriSize = sizeof(MI_LOAD_REGISTER_IMM);
|
||||
overrideCoherencyRequest(false, false);
|
||||
auto retSize = csr->getCmdSizeForCoherency();
|
||||
auto retSize = csr->getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
|
||||
overrideCoherencyRequest(false, true);
|
||||
retSize = csr->getCmdSizeForCoherency();
|
||||
retSize = csr->getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
|
||||
overrideCoherencyRequest(true, true);
|
||||
retSize = csr->getCmdSizeForCoherency();
|
||||
retSize = csr->getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(lriSize, retSize);
|
||||
|
||||
overrideCoherencyRequest(true, false);
|
||||
retSize = csr->getCmdSizeForCoherency();
|
||||
retSize = csr->getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(lriSize, retSize);
|
||||
}
|
||||
|
||||
@ -68,14 +68,14 @@ GEN10TEST_F(Gen10CoherencyRequirements, coherencyCmdValues) {
|
||||
expectedCmd.setDataDword(DwordBuilder::build(4, true));
|
||||
|
||||
overrideCoherencyRequest(true, false);
|
||||
csr->programCoherency(stream, flags);
|
||||
csr->programComputeMode(stream, flags);
|
||||
EXPECT_EQ(lriSize, stream.getUsed());
|
||||
|
||||
auto cmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(stream.getCpuBase());
|
||||
EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0);
|
||||
|
||||
overrideCoherencyRequest(true, true);
|
||||
csr->programCoherency(stream, flags);
|
||||
csr->programComputeMode(stream, flags);
|
||||
EXPECT_EQ(lriSize * 2, stream.getUsed());
|
||||
|
||||
cmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream.getCpuBase(), lriSize));
|
||||
|
@ -19,14 +19,14 @@ GEN8TEST_F(Gen8CoherencyRequirements, noCoherencyProgramming) {
|
||||
LinearStream stream;
|
||||
DispatchFlags flags = {};
|
||||
|
||||
auto retSize = csr.getCmdSizeForCoherency();
|
||||
auto retSize = csr.getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
csr.programCoherency(stream, flags);
|
||||
csr.programComputeMode(stream, flags);
|
||||
EXPECT_EQ(0u, stream.getUsed());
|
||||
|
||||
flags.requiresCoherency = true;
|
||||
retSize = csr.getCmdSizeForCoherency();
|
||||
retSize = csr.getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
csr.programCoherency(stream, flags);
|
||||
csr.programComputeMode(stream, flags);
|
||||
EXPECT_EQ(0u, stream.getUsed());
|
||||
}
|
||||
|
@ -19,14 +19,14 @@ GEN9TEST_F(Gen9CoherencyRequirements, noCoherencyProgramming) {
|
||||
LinearStream stream;
|
||||
DispatchFlags flags = {};
|
||||
|
||||
auto retSize = csr.getCmdSizeForCoherency();
|
||||
auto retSize = csr.getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
csr.programCoherency(stream, flags);
|
||||
csr.programComputeMode(stream, flags);
|
||||
EXPECT_EQ(0u, stream.getUsed());
|
||||
|
||||
flags.requiresCoherency = true;
|
||||
retSize = csr.getCmdSizeForCoherency();
|
||||
retSize = csr.getCmdSizeForComputeMode();
|
||||
EXPECT_EQ(0u, retSize);
|
||||
csr.programCoherency(stream, flags);
|
||||
csr.programComputeMode(stream, flags);
|
||||
EXPECT_EQ(0u, stream.getUsed());
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/kernel/grf_config.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/scheduler/scheduler_kernel.h"
|
||||
#include "runtime/device/device.h"
|
||||
@ -118,6 +119,7 @@ class MockKernel : public Kernel {
|
||||
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
|
||||
memset(executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
|
||||
executionEnvironment->HasDeviceEnqueue = 0;
|
||||
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
info->patchInfo.executionEnvironment = executionEnvironment;
|
||||
|
||||
info->crossThreadData = new char[crossThreadSize];
|
||||
@ -243,6 +245,8 @@ class MockKernelWithInternals {
|
||||
memset(&executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
|
||||
executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
kernelHeader.SurfaceStateHeapSize = sizeof(sshLocal);
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
@ -324,6 +328,7 @@ class MockParentKernel : public Kernel {
|
||||
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
|
||||
*executionEnvironment = {};
|
||||
executionEnvironment->HasDeviceEnqueue = 1;
|
||||
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
info->patchInfo.executionEnvironment = executionEnvironment;
|
||||
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface;
|
||||
|
Reference in New Issue
Block a user