add dispatch hints for more flixable control on workload dispatch
Change-Id: Iecfe3031172fd108a1ef0d77d2fff8ad3cef22b3
This commit is contained in:
parent
539d688877
commit
9066192dc3
|
@ -423,6 +423,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
|
||||
uint64_t getSliceCount() const { return sliceCount; }
|
||||
|
||||
// extend dispatch hints
|
||||
uint64_t dispatchHints = 0;
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
|
|
|
@ -709,6 +709,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
|
||||
}
|
||||
|
||||
if (this->dispatchHints != 0) {
|
||||
dispatchFlags.epilogueRequired = true;
|
||||
}
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
gtpinNotifyPreFlushTask(this);
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl
|
||||
)
|
||||
|
||||
get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM)
|
||||
|
|
|
@ -53,6 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
size_t getCmdSizeForPipelineSelect() const;
|
||||
size_t getCmdSizeForComputeMode();
|
||||
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
||||
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
|
||||
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
|
@ -88,6 +89,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
|
||||
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||
virtual void initPageTableManagerRegisters(LinearStream &csr){};
|
||||
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||
|
||||
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
|
||||
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include "runtime/os_interface/os_context.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#include "command_stream_receiver_hw_ext.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -258,6 +260,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
||||
}
|
||||
|
||||
programEngineModeCommands(commandStreamCSR, dispatchFlags);
|
||||
initPageTableManagerRegisters(commandStreamCSR);
|
||||
programComputeMode(commandStreamCSR, dispatchFlags);
|
||||
programL3(commandStreamCSR, dispatchFlags, newL3Config);
|
||||
|
|
|
@ -57,12 +57,15 @@ template <typename GfxFamily>
|
|||
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {
|
||||
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||
this->programEngineModeEpliogue(csr, dispatchFlags);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const {
|
||||
return 0u;
|
||||
return this->getCmdSizeForEngineMode(dispatchFlags);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -78,6 +78,7 @@ struct DispatchFlags {
|
|||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
|
||||
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
|
||||
uint64_t engineHints = 0;
|
||||
bool blocking = false;
|
||||
bool dcFlush = false;
|
||||
bool useSLM = false;
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "core/command_stream/linear_stream.h"
|
||||
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -27,6 +27,8 @@ set(RUNTIME_SRCS_MEM_OBJ
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
|
||||
)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})
|
||||
|
|
|
@ -182,6 +182,7 @@ class BufferHw : public Buffer {
|
|||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
|
||||
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
MemoryProperties properties,
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
|
||||
#include "buffer_ext.inl"
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
@ -79,6 +80,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
|||
}
|
||||
|
||||
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
|
||||
appendSurfaceStateExt(memory);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -290,6 +290,7 @@ class ImageHw : public Image {
|
|||
void setMediaSurfaceRotation(void *memory) override;
|
||||
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
|
||||
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
|
||||
void transformImage2dArrayTo3d(void *memory) override;
|
||||
void transformImage3dTo2dArray(void *memory) override;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "runtime/mem_obj/image.h"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
#include "image_ext.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -155,6 +156,7 @@ void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, ui
|
|||
setAuxParamsForCCS(surfaceState, gmm);
|
||||
}
|
||||
appendSurfaceStateParams(surfaceState);
|
||||
appendSurfaceStateExt(surfaceState);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -1091,3 +1091,16 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
|
|||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
pCmdQ->dispatchHints = 1;
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue