Unify setting compute mode

Change-Id: I8fd5a0cf1a121498efbbf1edb332920578d91598
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-03-19 10:29:36 +01:00
committed by sys_ocldev
parent b36fb6c0bf
commit 5de8f3ac3d
15 changed files with 71 additions and 42 deletions

View File

@@ -11,6 +11,8 @@ set(NEO_CORE_COMMAND_CONTAINER
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_plus.inl
)
add_subdirectories()
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER})

View File

@@ -45,7 +45,7 @@ struct EncodeStates {
uint32_t borderColorOffset,
const void *fnDynamicStateHeap);
static void adjustStateComputeMode(CommandContainer &container);
static void adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency);
static size_t getAdjustStateComputeModeSize();
};
@@ -167,7 +167,10 @@ struct EncodeSurfaceState {
template <typename GfxFamily>
struct EncodeComputeMode {
static void adjustComputeMode(CommandContainer &container, uint32_t numGrfRequired);
using STATE_COMPUTE_MODE = typename GfxFamily::STATE_COMPUTE_MODE;
static void adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable);
static void adjustPipelineSelect(CommandContainer &container, uint32_t numGrfRequired);
};
template <typename GfxFamily>

View File

@@ -278,10 +278,6 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, void *address, size_t s
ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
template <typename Family>
void EncodeStates<Family>::adjustStateComputeMode(CommandContainer &container) {
}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {

View File

@@ -53,8 +53,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
idd.setKernelStartPointer(offset);
idd.setKernelStartPointerHigh(0u);
}
EncodeStates<Family>::adjustStateComputeMode(container);
EncodeStates<Family>::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false);
auto threadsPerThreadGroup = dispatchInterface->getThreadsPerThreadGroupCount();
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
@@ -276,5 +275,4 @@ template <typename GfxFamily>
size_t EncodeMiFlushDW<GfxFamily>::getMiFlushDwWaSize() {
return 0;
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
namespace NEO {
template <typename Family>
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) {
}
} // namespace NEO

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
namespace NEO {
template <typename Family>
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) {
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast<STATE_COMPUTE_MODE *>(stateComputeModePtr)) : Family::cmdInitStateComputeMode;
FORCE_NON_COHERENT coherencyValue = !requiresCoherency ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED;
stateComputeMode.setForceNonCoherent(coherencyValue);
stateComputeMode.setMaskBits(stateComputeMode.getMaskBits() | Family::stateComputeModeForceNonCoherentMask);
EncodeComputeMode<Family>::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable);
}
} // namespace NEO

View File

@@ -60,7 +60,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
bool isComputeModeNeeded() const;
bool isPipelineSelectAlreadyProgrammed() const;
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
void adjustComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, void *const stateComputeMode);
void adjustThreadArbitionPolicy(void *const stateComputeMode);
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
const HardwareInfo &peekHwInfo() const;

View File

@@ -64,7 +64,7 @@ void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &com
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::adjustComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, void *const stateComputeMode) {}
void CommandStreamReceiverHw<GfxFamily>::adjustThreadArbitionPolicy(void *const stateComputeMode) {}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {

View File

@@ -5,32 +5,25 @@
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/helpers/state_compute_mode_helper.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
using STATE_COMPUTE_MODE = typename GfxFamily::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
if (isComputeModeNeeded()) {
programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, true);
auto stateComputeMode = stream.getSpaceForCmd<STATE_COMPUTE_MODE>();
*stateComputeMode = GfxFamily::cmdInitStateComputeMode;
FORCE_NON_COHERENT coherencyValue = !dispatchFlags.requiresCoherency ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED;
stateComputeMode->setForceNonCoherent(coherencyValue);
this->lastSentCoherencyRequest = static_cast<int8_t>(dispatchFlags.requiresCoherency);
stateComputeMode->setMaskBits(GfxFamily::stateComputeModeForceNonCoherentMask);
auto stateComputeMode = GfxFamily::cmdInitStateComputeMode;
adjustThreadArbitionPolicy(&stateComputeMode);
EncodeStates<GfxFamily>::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency);
if (csrSizeRequestFlags.hasSharedHandles) {
auto pc = stream.getSpaceForCmd<PIPE_CONTROL>();
*pc = GfxFamily::cmdInitPipeControl;
}
adjustComputeMode(stream, dispatchFlags, stateComputeMode);
programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, false);
}
}

View File

@@ -8,6 +8,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen11/hw_cmds_base.h"
#include "shared/source/gen11/reg_configs.h"

View File

@@ -8,32 +8,25 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl"
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/reg_configs.h"
namespace NEO {
using Family = TGLLPFamily;
template <>
void EncodeStates<Family>::adjustStateComputeMode(CommandContainer &container) {
auto stateComputeModeCmd = Family::cmdInitStateComputeMode;
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
using FORCE_NON_COHERENT = typename Family::STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
stateComputeModeCmd.setForceNonCoherent(FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
stateComputeModeCmd.setMaskBits(Family::stateComputeModeForceNonCoherentMask);
// Commit our commands to the commandStream
auto buffer = container.getCommandStream()->getSpace(sizeof(stateComputeModeCmd));
*(decltype(stateComputeModeCmd) *)buffer = stateComputeModeCmd;
}
template <>
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
return sizeof(typename Family::STATE_COMPUTE_MODE);
}
template <>
void EncodeComputeMode<Family>::adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable) {
STATE_COMPUTE_MODE *stateComputeMode = static_cast<STATE_COMPUTE_MODE *>(stateComputeModePtr);
auto buffer = csr.getSpace(sizeof(STATE_COMPUTE_MODE));
*reinterpret_cast<STATE_COMPUTE_MODE *>(buffer) = *stateComputeMode;
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@@ -8,6 +8,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen8/hw_cmds_base.h"
#include "shared/source/gen8/reg_configs.h"

View File

@@ -8,6 +8,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen9/hw_cmds_base.h"
#include "shared/source/gen9/reg_configs.h"

View File

@@ -17,15 +17,15 @@ class InternalAllocationStorage;
class HeapHelper {
public:
HeapHelper(MemoryManager *memManager, InternalAllocationStorage *storageForReuse, bool isMultiOsContextCapable) : storageForReuse(storageForReuse),
memManager(memManager),
isMultiOsContextCapable(isMultiOsContextCapable) {}
HeapHelper(MemoryManager *memManager, InternalAllocationStorage *storageForReuse, bool isMultiOsContextCapable) : isMultiOsContextCapable(isMultiOsContextCapable),
storageForReuse(storageForReuse),
memManager(memManager) {}
GraphicsAllocation *getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex);
void storeHeapAllocation(GraphicsAllocation *heapAllocation);
bool isMultiOsContextCapable = false;
protected:
InternalAllocationStorage *storageForReuse = nullptr;
MemoryManager *memManager = nullptr;
bool isMultiOsContextCapable = false;
};
} // namespace NEO

View File

@@ -30,7 +30,7 @@ GEN12LPTEST_F(CommandEncoderTest, givenAdjustStateComputeModeStateComputeModeSho
auto usedSpaceBefore = cmdContainer.getCommandStream()->getUsed();
// Adjust the State Compute Mode which sets FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT
EncodeStates<FamilyType>::adjustStateComputeMode(cmdContainer);
EncodeStates<FamilyType>::adjustStateComputeMode(*cmdContainer.getCommandStream(), cmdContainer.lastSentNumGrfRequired, nullptr, false, false);
auto usedSpaceAfter = cmdContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);