mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Move Scratch Space functionality to dedicated class
Change-Id: Ic7655c4b971513961aba6823478a139ffc943466
This commit is contained in:
@ -32,6 +32,10 @@ set(RUNTIME_SRCS_COMMAND_STREAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
|
||||
@ -46,3 +50,4 @@ set(RUNTIME_SRCS_COMMAND_STREAM
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_STREAM})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_STREAM ${RUNTIME_SRCS_COMMAND_STREAM})
|
||||
add_subdirectories()
|
@ -24,7 +24,8 @@ CommandStreamReceiver *AUBCommandStreamReceiver::create(const HardwareInfo &hwIn
|
||||
// Generate the full filename
|
||||
const auto >SystemInfo = *hwInfo.pSysInfo;
|
||||
std::stringstream strfilename;
|
||||
strfilename << hwPrefix << "_" << gtSystemInfo.SliceCount << "x" << gtSystemInfo.SubSliceCount << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << baseName << ".aub";
|
||||
uint32_t subSlicesPerSlice = gtSystemInfo.SubSliceCount / gtSystemInfo.SliceCount;
|
||||
strfilename << hwPrefix << "_" << gtSystemInfo.SliceCount << "x" << subSlicesPerSlice << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << baseName << ".aub";
|
||||
|
||||
// clean-up any fileName issues because of the file system incompatibilities
|
||||
auto fileName = strfilename.str();
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/command_stream/experimental_command_buffer.h"
|
||||
#include "runtime/command_stream/preemption.h"
|
||||
#include "runtime/command_stream/scratch_space_controller.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
@ -157,11 +158,6 @@ void CommandStreamReceiver::cleanupResources() {
|
||||
waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, TEMPORARY_ALLOCATION);
|
||||
waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, REUSABLE_ALLOCATION);
|
||||
|
||||
if (scratchAllocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
|
||||
scratchAllocation = nullptr;
|
||||
}
|
||||
|
||||
if (debugSurface) {
|
||||
getMemoryManager()->freeGraphicsMemory(debugSurface);
|
||||
debugSurface = nullptr;
|
||||
@ -217,6 +213,10 @@ void CommandStreamReceiver::setRequiredScratchSize(uint32_t newRequiredScratchSi
|
||||
}
|
||||
}
|
||||
|
||||
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {
|
||||
return scratchSpaceController->getScratchSpaceAllocation();
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::initProgrammingFlags() {
|
||||
isPreambleSent = false;
|
||||
GSBAFor32BitProgrammed = false;
|
||||
@ -310,6 +310,7 @@ void CommandStreamReceiver::allocateHeapMemory(IndirectHeap::Type heapType,
|
||||
indirectHeap = new IndirectHeap(heapMemory, requireInternalHeap);
|
||||
indirectHeap->overrideMaxSize(finalHeapSize);
|
||||
}
|
||||
scratchSpaceController->reserveHeap(heapType, indirectHeap);
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
||||
|
@ -35,6 +35,7 @@ class LinearStream;
|
||||
class MemoryManager;
|
||||
class OsContext;
|
||||
class OSInterface;
|
||||
class ScratchSpaceController;
|
||||
class TimestampPacket;
|
||||
struct HwPerfCounter;
|
||||
struct HwTimeStamps;
|
||||
@ -116,7 +117,7 @@ class CommandStreamReceiver {
|
||||
virtual void overrideMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
|
||||
|
||||
void setRequiredScratchSize(uint32_t newRequiredScratchSize);
|
||||
GraphicsAllocation *getScratchAllocation() const { return scratchAllocation; }
|
||||
GraphicsAllocation *getScratchAllocation();
|
||||
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
|
||||
GraphicsAllocation *allocateDebugSurface(size_t size);
|
||||
|
||||
@ -180,6 +181,7 @@ class CommandStreamReceiver {
|
||||
std::unique_ptr<ExperimentalCommandBuffer> experimentalCmdBuffer;
|
||||
std::unique_ptr<InternalAllocationStorage> internalAllocationStorage;
|
||||
std::unique_ptr<KmdNotifyHelper> kmdNotifyHelper;
|
||||
std::unique_ptr<ScratchSpaceController> scratchSpaceController;
|
||||
std::unique_ptr<TagAllocator<HwTimeStamps>> profilingTimeStampAllocator;
|
||||
std::unique_ptr<TagAllocator<HwPerfCounter>> perfCounterAllocator;
|
||||
std::unique_ptr<TagAllocator<TimestampPacket>> timestampPacketAllocator;
|
||||
@ -194,7 +196,6 @@ class CommandStreamReceiver {
|
||||
volatile uint32_t *tagAddress = nullptr;
|
||||
|
||||
GraphicsAllocation *tagAllocation = nullptr;
|
||||
GraphicsAllocation *scratchAllocation = nullptr;
|
||||
GraphicsAllocation *preemptionCsrAllocation = nullptr;
|
||||
GraphicsAllocation *debugSurface = nullptr;
|
||||
OSInterface *osInterface = nullptr;
|
||||
|
@ -81,7 +81,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void handleEventsTimestampPacketTags(LinearStream &csr, DispatchFlags &dispatchFlags, Device ¤tDevice);
|
||||
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
virtual void initPageTableManagerRegisters(LinearStream &csr){};
|
||||
void createScratchSpaceAllocation(size_t requiredScratchSizeInBytes);
|
||||
|
||||
void addPipeControlWA(LinearStream &commandStream, bool flushDC);
|
||||
void addDcFlushToPipeControl(typename GfxFamily::PIPE_CONTROL *pCmd, bool flushDC);
|
||||
@ -90,6 +89,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getSshHeapSize();
|
||||
|
||||
uint64_t getScratchPatchAddress();
|
||||
void createScratchSpaceController(const HardwareInfo &hwInfoIn);
|
||||
|
||||
static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate);
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||
#include "runtime/command_stream/experimental_command_buffer.h"
|
||||
#include "runtime/command_stream/linear_stream.h"
|
||||
#include "runtime/command_stream/scratch_space_controller_base.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/gtpin/gtpin_notify.h"
|
||||
@ -51,6 +52,7 @@ CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(const HardwareInfo &
|
||||
if (DebugManager.flags.EnableTimestampPacket.get() != -1) {
|
||||
timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get();
|
||||
}
|
||||
createScratchSpaceController(hwInfoIn);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -237,22 +239,21 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
|
||||
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode;
|
||||
|
||||
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;
|
||||
|
||||
auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations();
|
||||
|
||||
bool stateBaseAddressDirty = false;
|
||||
|
||||
if (requiredScratchSize && (!scratchAllocation || scratchAllocation->getUnderlyingBufferSize() < requiredScratchSizeInBytes)) {
|
||||
if (scratchAllocation) {
|
||||
scratchAllocation->updateTaskCount(this->taskCount, this->deviceIndex);
|
||||
internalAllocationStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
|
||||
}
|
||||
createScratchSpaceAllocation(requiredScratchSizeInBytes);
|
||||
overrideMediaVFEStateDirty(true);
|
||||
if (is64bit && !force32BitAllocations) {
|
||||
stateBaseAddressDirty = true;
|
||||
bool checkVfeStateDirty = false;
|
||||
if (requiredScratchSize) {
|
||||
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
|
||||
requiredScratchSize,
|
||||
this->taskCount,
|
||||
this->deviceIndex,
|
||||
stateBaseAddressDirty,
|
||||
checkVfeStateDirty);
|
||||
if (checkVfeStateDirty) {
|
||||
overrideMediaVFEStateDirty(true);
|
||||
}
|
||||
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
|
||||
}
|
||||
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
@ -308,8 +309,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
|
||||
uint64_t newGSHbase = 0;
|
||||
GSBAFor32BitProgrammed = false;
|
||||
if (is64bit && scratchAllocation && !force32BitAllocations) {
|
||||
newGSHbase = (uint64_t)scratchAllocation->getUnderlyingBuffer() - PreambleHelper<GfxFamily>::getScratchSpaceOffsetFor64bit();
|
||||
if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) {
|
||||
newGSHbase = scratchSpaceController->calculateNewGSH();
|
||||
} else if (is64bit && force32BitAllocations && dispatchFlags.GSBA32BitRequired) {
|
||||
newGSHbase = getMemoryManager()->allocator32Bit->getBase();
|
||||
GSBAFor32BitProgrammed = true;
|
||||
@ -381,9 +382,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
|
||||
this->makeResident(*tagAllocation);
|
||||
|
||||
if (requiredScratchSize)
|
||||
makeResident(*scratchAllocation);
|
||||
|
||||
if (preemptionCsrAllocation)
|
||||
makeResident(*preemptionCsrAllocation);
|
||||
|
||||
@ -608,22 +606,6 @@ void CommandStreamReceiverHw<GfxFamily>::addPipeControl(LinearStream &commandStr
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t CommandStreamReceiverHw<GfxFamily>::getScratchPatchAddress() {
|
||||
//for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address
|
||||
//for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
|
||||
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
|
||||
|
||||
uint64_t scratchAddress = 0;
|
||||
if (requiredScratchSize) {
|
||||
scratchAddress = scratchAllocation->getGpuAddressToPatch();
|
||||
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
|
||||
//this is to avoid scractch allocation offset "0"
|
||||
scratchAddress = PreambleHelper<GfxFamily>::getScratchSpaceOffsetFor64bit();
|
||||
}
|
||||
}
|
||||
return scratchAddress;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
size_t size = getRequiredCmdStreamSize(dispatchFlags, device);
|
||||
@ -821,7 +803,12 @@ void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearS
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceAllocation(size_t requiredScratchSizeInBytes) {
|
||||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryInPreferredPool(AllocationFlags(true), 0, nullptr, requiredScratchSizeInBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE);
|
||||
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController(const HardwareInfo &hwInfoIn) {
|
||||
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(hwInfoIn, executionEnvironment, *internalAllocationStorage.get());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t CommandStreamReceiverHw<GfxFamily>::getScratchPatchAddress() {
|
||||
return scratchSpaceController->getScratchPatchAddress();
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
32
runtime/command_stream/scratch_space_controller.cpp
Normal file
32
runtime/command_stream/scratch_space_controller.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_stream/scratch_space_controller.h"
|
||||
#include "runtime/execution_environment/execution_environment.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/memory_manager/internal_allocation_storage.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
ScratchSpaceController::ScratchSpaceController(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage)
|
||||
: hwInfo(info), executionEnvironment(environment), csrAllocationStorage(allocationStorage) {
|
||||
auto &hwHelper = HwHelper::get(info.pPlatform->eRenderCoreFamily);
|
||||
computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(&hwInfo);
|
||||
}
|
||||
|
||||
ScratchSpaceController::~ScratchSpaceController() {
|
||||
if (scratchAllocation) {
|
||||
getMemoryManager()->freeGraphicsMemory(scratchAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
MemoryManager *ScratchSpaceController::getMemoryManager() const {
|
||||
UNRECOVERABLE_IF(executionEnvironment.memoryManager.get() == nullptr);
|
||||
return executionEnvironment.memoryManager.get();
|
||||
}
|
||||
} // namespace OCLRT
|
52
runtime/command_stream/scratch_space_controller.h
Normal file
52
runtime/command_stream/scratch_space_controller.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class Device;
|
||||
class ExecutionEnvironment;
|
||||
class GraphicsAllocation;
|
||||
class InternalAllocationStorage;
|
||||
class MemoryManager;
|
||||
struct HardwareInfo;
|
||||
|
||||
class ScratchSpaceController {
|
||||
public:
|
||||
ScratchSpaceController(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
|
||||
virtual ~ScratchSpaceController();
|
||||
|
||||
GraphicsAllocation *getScratchSpaceAllocation() {
|
||||
return scratchAllocation;
|
||||
}
|
||||
virtual void setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t currentTaskCount,
|
||||
uint32_t deviceIdx,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) = 0;
|
||||
virtual uint64_t calculateNewGSH() = 0;
|
||||
virtual uint64_t getScratchPatchAddress() = 0;
|
||||
|
||||
virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0;
|
||||
|
||||
protected:
|
||||
MemoryManager *getMemoryManager() const;
|
||||
|
||||
const HardwareInfo &hwInfo;
|
||||
ExecutionEnvironment &executionEnvironment;
|
||||
GraphicsAllocation *scratchAllocation = nullptr;
|
||||
InternalAllocationStorage &csrAllocationStorage;
|
||||
size_t scratchSizeBytes = 0;
|
||||
bool force32BitAllocation = false;
|
||||
uint32_t computeUnitsUsedForScratch = 0;
|
||||
};
|
||||
} // namespace OCLRT
|
74
runtime/command_stream/scratch_space_controller_base.cpp
Normal file
74
runtime/command_stream/scratch_space_controller_base.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/command_stream/scratch_space_controller_base.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/memory_manager/memory_constants.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/memory_manager/internal_allocation_storage.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
ScratchSpaceControllerBase::ScratchSpaceControllerBase(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage)
|
||||
: ScratchSpaceController(info, environment, allocationStorage) {
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t currentTaskCount,
|
||||
uint32_t deviceIdx,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) {
|
||||
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch;
|
||||
if (requiredScratchSizeInBytes && (!scratchAllocation || scratchSizeBytes < requiredScratchSizeInBytes)) {
|
||||
if (scratchAllocation) {
|
||||
scratchAllocation->updateTaskCount(currentTaskCount, deviceIdx);
|
||||
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
|
||||
}
|
||||
scratchSizeBytes = requiredScratchSizeInBytes;
|
||||
createScratchSpaceAllocation();
|
||||
vfeStateDirty = true;
|
||||
force32BitAllocation = getMemoryManager()->peekForce32BitAllocations();
|
||||
if (is64bit && !force32BitAllocation) {
|
||||
stateBaseAddressDirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerBase::createScratchSpaceAllocation() {
|
||||
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryInPreferredPool(AllocationFlags(true), 0, nullptr, scratchSizeBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE);
|
||||
UNRECOVERABLE_IF(scratchAllocation == nullptr);
|
||||
}
|
||||
|
||||
uint64_t ScratchSpaceControllerBase::calculateNewGSH() {
|
||||
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
|
||||
auto scratchSpaceOffsetFor64bit = hwHelper.getScratchSpaceOffsetFor64bit();
|
||||
return reinterpret_cast<uint64_t>(scratchAllocation->getUnderlyingBuffer()) - scratchSpaceOffsetFor64bit;
|
||||
}
|
||||
uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() {
|
||||
//for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address
|
||||
//for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit"
|
||||
// and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state
|
||||
uint64_t scratchAddress = 0;
|
||||
if (scratchAllocation) {
|
||||
scratchAddress = scratchAllocation->getGpuAddressToPatch();
|
||||
if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) {
|
||||
auto &hwHelper = HwHelper::get(hwInfo.pPlatform->eRenderCoreFamily);
|
||||
auto scratchSpaceOffsetFor64bit = hwHelper.getScratchSpaceOffsetFor64bit();
|
||||
//this is to avoid scractch allocation offset "0"
|
||||
scratchAddress = scratchSpaceOffsetFor64bit;
|
||||
}
|
||||
}
|
||||
return scratchAddress;
|
||||
}
|
||||
|
||||
void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) {
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
31
runtime/command_stream/scratch_space_controller_base.h
Normal file
31
runtime/command_stream/scratch_space_controller_base.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_stream/scratch_space_controller.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class ScratchSpaceControllerBase : public ScratchSpaceController {
|
||||
public:
|
||||
ScratchSpaceControllerBase(const HardwareInfo &info, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage);
|
||||
|
||||
void setRequiredScratchSpace(void *sshBaseAddress,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t currentTaskCount,
|
||||
uint32_t deviceIdx,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty) override;
|
||||
uint64_t calculateNewGSH() override;
|
||||
uint64_t getScratchPatchAddress() override;
|
||||
|
||||
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override;
|
||||
|
||||
protected:
|
||||
void createScratchSpaceAllocation();
|
||||
};
|
||||
} // namespace OCLRT
|
@ -66,4 +66,5 @@ const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI
|
||||
const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit();
|
||||
const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit();
|
||||
const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit();
|
||||
const Family::RENDER_SURFACE_STATE Family::cmdRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit();
|
||||
} // namespace OCLRT
|
||||
|
@ -34,6 +34,7 @@ struct CNLFamily : public GEN10 {
|
||||
static const MI_BATCH_BUFFER_START cmdInitBatchBufferStart;
|
||||
static const PIPE_CONTROL cmdInitPipeControl;
|
||||
static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
|
||||
static const RENDER_SURFACE_STATE cmdRenderSurfaceState;
|
||||
|
||||
static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) {
|
||||
return cmdSetBaseFamily == IGFX_GEN8_CORE;
|
||||
|
@ -77,7 +77,7 @@ void CNL_2x5x8::setupHardwareInfo(GT_SYSTEM_INFO *gtSysInfo, FeatureTable *featu
|
||||
gtSysInfo->EUCount = 39;
|
||||
gtSysInfo->ThreadCount = 39 * CNL::threadsPerEu;
|
||||
gtSysInfo->SliceCount = 2;
|
||||
gtSysInfo->SubSliceCount = 5;
|
||||
gtSysInfo->SubSliceCount = 10;
|
||||
gtSysInfo->L3CacheSizeInKb = 1536;
|
||||
gtSysInfo->L3BankCount = 6;
|
||||
gtSysInfo->MaxFillRate = 16;
|
||||
@ -105,7 +105,7 @@ void CNL_2x4x8::setupHardwareInfo(GT_SYSTEM_INFO *gtSysInfo, FeatureTable *featu
|
||||
gtSysInfo->EUCount = 31;
|
||||
gtSysInfo->ThreadCount = 31 * CNL::threadsPerEu;
|
||||
gtSysInfo->SliceCount = 2;
|
||||
gtSysInfo->SubSliceCount = 4;
|
||||
gtSysInfo->SubSliceCount = 8;
|
||||
gtSysInfo->L3CacheSizeInKb = 1536;
|
||||
gtSysInfo->L3BankCount = 6;
|
||||
gtSysInfo->MaxFillRate = 16;
|
||||
@ -189,7 +189,7 @@ void CNL_4x9x8::setupHardwareInfo(GT_SYSTEM_INFO *gtSysInfo, FeatureTable *featu
|
||||
gtSysInfo->EUCount = 71;
|
||||
gtSysInfo->ThreadCount = 71 * CNL::threadsPerEu;
|
||||
gtSysInfo->SliceCount = 4;
|
||||
gtSysInfo->SubSliceCount = 9;
|
||||
gtSysInfo->SubSliceCount = 36;
|
||||
gtSysInfo->L3CacheSizeInKb = 1536;
|
||||
gtSysInfo->L3BankCount = 6;
|
||||
gtSysInfo->MaxFillRate = 16;
|
||||
|
@ -59,4 +59,5 @@ const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI
|
||||
const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit();
|
||||
const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit();
|
||||
const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit();
|
||||
const Family::RENDER_SURFACE_STATE Family::cmdRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit();
|
||||
} // namespace OCLRT
|
||||
|
@ -33,6 +33,7 @@ struct BDWFamily : public GEN8 {
|
||||
static const MI_BATCH_BUFFER_START cmdInitBatchBufferStart;
|
||||
static const PIPE_CONTROL cmdInitPipeControl;
|
||||
static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
|
||||
static const RENDER_SURFACE_STATE cmdRenderSurfaceState;
|
||||
|
||||
static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) {
|
||||
return cmdSetBaseFamily == IGFX_GEN8_CORE;
|
||||
|
@ -58,4 +58,5 @@ const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI
|
||||
const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit();
|
||||
const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit();
|
||||
const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit();
|
||||
const Family::RENDER_SURFACE_STATE Family::cmdRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit();
|
||||
} // namespace OCLRT
|
||||
|
@ -34,6 +34,7 @@ struct SKLFamily : public GEN9 {
|
||||
static const MI_BATCH_BUFFER_START cmdInitBatchBufferStart;
|
||||
static const PIPE_CONTROL cmdInitPipeControl;
|
||||
static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
|
||||
static const RENDER_SURFACE_STATE cmdRenderSurfaceState;
|
||||
|
||||
static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) {
|
||||
return cmdSetBaseFamily == IGFX_GEN8_CORE;
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/gen_common/aub_mapper.h"
|
||||
#include "runtime/gen_common/hw_cmds.h"
|
||||
#include "runtime/command_stream/linear_stream.h"
|
||||
@ -15,6 +16,8 @@
|
||||
#include <type_traits>
|
||||
|
||||
namespace OCLRT {
|
||||
class ExecutionEnvironment;
|
||||
class GraphicsAllocation;
|
||||
struct HardwareCapabilities;
|
||||
|
||||
class HwHelper {
|
||||
@ -37,6 +40,18 @@ class HwHelper {
|
||||
virtual const AubMemDump::LrcaHelper &getCsTraits(EngineInstanceT engineInstance) const = 0;
|
||||
virtual bool supportsYTiling() const = 0;
|
||||
virtual bool timestampPacketWriteSupported() const = 0;
|
||||
virtual size_t getRenderSurfaceStateSize() const = 0;
|
||||
virtual void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) = 0;
|
||||
virtual size_t getScratchSpaceOffsetFor64bit() = 0;
|
||||
|
||||
protected:
|
||||
HwHelper() = default;
|
||||
@ -72,6 +87,11 @@ class HwHelperHw : public HwHelper {
|
||||
return sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
size_t getRenderSurfaceStateSize() const override {
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
return sizeof(RENDER_SURFACE_STATE);
|
||||
}
|
||||
|
||||
const AubMemDump::LrcaHelper &getCsTraits(EngineInstanceT engineInstance) const override;
|
||||
|
||||
size_t getMaxBarrierRegisterPerSlice() const override;
|
||||
@ -98,6 +118,19 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) override;
|
||||
|
||||
size_t getScratchSpaceOffsetFor64bit() override;
|
||||
|
||||
protected:
|
||||
HwHelperHw() = default;
|
||||
};
|
||||
@ -149,4 +182,13 @@ struct PipeControlHelper {
|
||||
}
|
||||
};
|
||||
|
||||
union SURFACE_STATE_BUFFER_LENGTH {
|
||||
uint32_t Length;
|
||||
struct SurfaceState {
|
||||
uint32_t Width : BITFIELD_RANGE(0, 6);
|
||||
uint32_t Height : BITFIELD_RANGE(7, 20);
|
||||
uint32_t Depth : BITFIELD_RANGE(21, 31);
|
||||
} SurfaceState;
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
|
@ -6,11 +6,17 @@
|
||||
*/
|
||||
|
||||
#include "runtime/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "runtime/execution_environment/execution_environment.h"
|
||||
#include "runtime/gmm_helper/gmm.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/memory_manager/memory_constants.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename Family>
|
||||
void HwHelperHw<Family>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
|
||||
coherencyFlag = true;
|
||||
@ -84,4 +90,76 @@ template <typename Family>
|
||||
bool HwHelperHw<Family>::timestampPacketWriteSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) {
|
||||
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
auto gmmHelper = executionEnvironment.getGmmHelper();
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer);
|
||||
*surfaceState = Family::cmdRenderSurfaceState;
|
||||
auto surfaceSize = alignUp(bufferSize, 4);
|
||||
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
|
||||
|
||||
surfaceState->setWidth(Length.SurfaceState.Width + 1);
|
||||
surfaceState->setHeight(Length.SurfaceState.Height + 1);
|
||||
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
|
||||
if (pitch) {
|
||||
surfaceState->setSurfacePitch(pitch);
|
||||
}
|
||||
|
||||
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
|
||||
auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa;
|
||||
bufferStateAddress += offset;
|
||||
|
||||
auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize;
|
||||
|
||||
surfaceState->setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType));
|
||||
|
||||
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
|
||||
surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
|
||||
surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
|
||||
|
||||
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) ||
|
||||
((flags & CL_MEM_READ_ONLY)) != 0) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
|
||||
} else {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
}
|
||||
|
||||
surfaceState->setSurfaceBaseAddress(bufferStateAddress);
|
||||
|
||||
Gmm *gmm = gfxAlloc ? gfxAlloc->gmm : nullptr;
|
||||
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
|
||||
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAlloc->getAllocationType()) {
|
||||
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
|
||||
} else {
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t HwHelperHw<Family>::getScratchSpaceOffsetFor64bit() {
|
||||
return 4096;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
@ -24,8 +24,6 @@ struct PreambleHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
|
||||
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags);
|
||||
static uint32_t getDefaultThreadArbitrationPolicy();
|
||||
|
@ -380,12 +380,7 @@ struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUnifo
|
||||
|
||||
kernel->setArgSvm(1, sizeUserMemory, destMemory);
|
||||
|
||||
outBuffer = csr->getMemoryManager()->allocateGraphicsMemory(sizeUserMemory, destMemory);
|
||||
csr->makeResidentHostPtrAllocation(outBuffer);
|
||||
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(outBuffer), TEMPORARY_ALLOCATION);
|
||||
ASSERT_NE(nullptr, outBuffer);
|
||||
outBuffer->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
outBuffer->setMemObjectsAllocationWithWritableFlags(true);
|
||||
outBuffer = createHostPtrAllocationFromSvmPtr(destMemory, sizeUserMemory);
|
||||
}
|
||||
|
||||
void initializeExpectedMemory(size_t globalX, size_t globalY, size_t globalZ) {
|
||||
|
@ -22,7 +22,8 @@ using OCLRT::folderAUB;
|
||||
std::string getAubFileName(const OCLRT::Device *pDevice, const std::string baseName) {
|
||||
const auto pGtSystemInfo = pDevice->getHardwareInfo().pSysInfo;
|
||||
std::stringstream strfilename;
|
||||
strfilename << pDevice->getProductAbbrev() << "_" << pGtSystemInfo->SliceCount << "x" << pGtSystemInfo->SubSliceCount << "x" << pGtSystemInfo->MaxEuPerSubSlice << "_" << baseName;
|
||||
uint32_t subSlicesPerSlice = pGtSystemInfo->SubSliceCount / pGtSystemInfo->SliceCount;
|
||||
strfilename << pDevice->getProductAbbrev() << "_" << pGtSystemInfo->SliceCount << "x" << subSlicesPerSlice << "x" << pGtSystemInfo->MaxEuPerSubSlice << "_" << baseName;
|
||||
|
||||
return strfilename.str();
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_parent_kernel_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fixture_tests.cpp
|
||||
|
24
unit_tests/aub_tests/fixtures/aub_fixture.cpp
Normal file
24
unit_tests/aub_tests/fixtures/aub_fixture.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/memory_manager/internal_allocation_storage.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "unit_tests/aub_tests/fixtures/aub_fixture.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
GraphicsAllocation *AUBFixture::createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size) {
|
||||
GraphicsAllocation *allocation = csr->getMemoryManager()->allocateGraphicsMemory(size, svmPtr);
|
||||
csr->makeResidentHostPtrAllocation(allocation);
|
||||
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
|
||||
allocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
allocation->setMemObjectsAllocationWithWritableFlags(true);
|
||||
return allocation;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
@ -53,6 +53,8 @@ class AUBFixture : public CommandQueueHwFixture {
|
||||
CommandQueueHwFixture::TearDown();
|
||||
}
|
||||
|
||||
GraphicsAllocation *createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size);
|
||||
|
||||
template <typename FamilyType>
|
||||
AUBCommandStreamReceiverHw<FamilyType> *getAubCsr() {
|
||||
AUBCommandStreamReceiverHw<FamilyType> *aubCsr = nullptr;
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "reg_configs_common.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/memory_manager/allocations_list.h"
|
||||
#include "unit_tests/command_queue/enqueue_fixture.h"
|
||||
#include "unit_tests/fixtures/hello_world_fixture.h"
|
||||
@ -325,8 +326,8 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
EXPECT_NE(0u, cmd->getScratchSpaceBasePointer());
|
||||
EXPECT_EQ(0u, GSHaddress);
|
||||
} else {
|
||||
EXPECT_EQ(PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), cmd->getScratchSpaceBasePointer());
|
||||
EXPECT_EQ(GSHaddress + PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), (uintptr_t)graphicsAllocation->getUnderlyingBuffer());
|
||||
EXPECT_EQ(HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit(), cmd->getScratchSpaceBasePointer());
|
||||
EXPECT_EQ(GSHaddress + HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit(), (uintptr_t)graphicsAllocation->getUnderlyingBuffer());
|
||||
}
|
||||
|
||||
auto allocationSize = scratchSize * pDevice->getDeviceInfo().computeUnitsUsedForScratch;
|
||||
@ -378,7 +379,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase;
|
||||
auto GSHaddress2 = sba2->getGeneralStateBaseAddress();
|
||||
EXPECT_NE(0u, GSHaddress2);
|
||||
EXPECT_EQ(PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), cmd2->getScratchSpaceBasePointer());
|
||||
EXPECT_EQ(HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit(), cmd2->getScratchSpaceBasePointer());
|
||||
EXPECT_NE(GSHaddress2, GSHaddress);
|
||||
}
|
||||
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize);
|
||||
@ -399,7 +400,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
if (is32bit) {
|
||||
EXPECT_EQ(0u, GSBaddress);
|
||||
} else if (is64bit) {
|
||||
EXPECT_EQ((uintptr_t)graphicsAllocation2->getUnderlyingBuffer(), GSBaddress + PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit());
|
||||
EXPECT_EQ((uintptr_t)graphicsAllocation2->getUnderlyingBuffer(), GSBaddress + HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit());
|
||||
}
|
||||
|
||||
EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "reg_configs_common.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/memory_manager/internal_allocation_storage.h"
|
||||
#include "runtime/os_interface/os_context.h"
|
||||
#include "test.h"
|
||||
@ -498,7 +499,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu
|
||||
uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh();
|
||||
|
||||
if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) {
|
||||
uint64_t expectedAddress = PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit();
|
||||
uint64_t expectedAddress = HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit();
|
||||
EXPECT_EQ(expectedAddress, scratchBaseLowPart);
|
||||
EXPECT_EQ(0u, scratchBaseHighPart);
|
||||
} else {
|
||||
@ -510,7 +511,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu
|
||||
EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress);
|
||||
} else {
|
||||
if (is64bit) {
|
||||
EXPECT_EQ(graphicsAddress - PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), GSHaddress);
|
||||
EXPECT_EQ(graphicsAddress - HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit(), GSHaddress);
|
||||
} else {
|
||||
EXPECT_EQ(0u, GSHaddress);
|
||||
}
|
||||
@ -608,7 +609,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNDRangeKer
|
||||
uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh();
|
||||
|
||||
if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) {
|
||||
lowPartGraphicsAddress = PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit();
|
||||
lowPartGraphicsAddress = HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit();
|
||||
highPartGraphicsAddress = 0u;
|
||||
}
|
||||
|
||||
@ -619,7 +620,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNDRangeKer
|
||||
EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress);
|
||||
} else {
|
||||
if (is64bit) {
|
||||
EXPECT_EQ(graphicsAddress - PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), GSHaddress);
|
||||
EXPECT_EQ(graphicsAddress - HwHelperHw<FamilyType>::get().getScratchSpaceOffsetFor64bit(), GSHaddress);
|
||||
} else {
|
||||
EXPECT_EQ(0u, GSHaddress);
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/command_stream/linear_stream.h"
|
||||
#include "runtime/command_stream/scratch_space_controller.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
@ -210,3 +211,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverHwTest, WhenCommandStreamReceiv
|
||||
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
|
||||
EXPECT_EQ(64 * KB, commandStreamReceiver.defaultSshSize);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAllocationIsNotCreated) {
|
||||
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*platformDevices[0], *pDevice->executionEnvironment);
|
||||
auto scratchController = commandStreamReceiver->scratchSpaceController.get();
|
||||
|
||||
bool stateBaseAddressDirty = false;
|
||||
bool cfeStateDirty = false;
|
||||
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, stateBaseAddressDirty, cfeStateDirty);
|
||||
EXPECT_FALSE(cfeStateDirty);
|
||||
EXPECT_FALSE(stateBaseAddressDirty);
|
||||
EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation());
|
||||
}
|
||||
|
@ -239,7 +239,7 @@ class SimpleKernelFixture : public ProgramFixture {
|
||||
|
||||
size_t kernelsCount;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernels[6] = {};
|
||||
std::unique_ptr<Kernel> kernels[8] = {};
|
||||
uint32_t kernelIds = 0;
|
||||
};
|
||||
|
||||
|
@ -26,7 +26,7 @@ struct UltCommandStreamReceiverTest
|
||||
DeviceFixture::SetUp();
|
||||
HardwareParse::SetUp();
|
||||
|
||||
size_t sizeStream = 256;
|
||||
size_t sizeStream = 512;
|
||||
size_t alignmentStream = 0x1000;
|
||||
cmdBuffer = alignedMalloc(sizeStream, alignmentStream);
|
||||
dshBuffer = alignedMalloc(sizeStream, alignmentStream);
|
||||
|
@ -30,25 +30,25 @@ TEST(CnlHwInfoConfig, givenHwInfoConfigStringThenAfterSetupResultingHwInfoIsCorr
|
||||
strConfig = "2x4x8";
|
||||
hardwareInfoSetup[productFamily](&gInfo, &fTable, false, strConfig);
|
||||
EXPECT_EQ(gInfo.SliceCount, 2u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 4u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 8u);
|
||||
EXPECT_EQ(gInfo.EUCount, 31u);
|
||||
|
||||
strConfig = "2x5x8";
|
||||
hardwareInfoSetup[productFamily](&gInfo, &fTable, false, strConfig);
|
||||
EXPECT_EQ(gInfo.SliceCount, 2u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 5u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 10u);
|
||||
EXPECT_EQ(gInfo.EUCount, 39u);
|
||||
|
||||
strConfig = "4x9x8";
|
||||
hardwareInfoSetup[productFamily](&gInfo, &fTable, false, strConfig);
|
||||
EXPECT_EQ(gInfo.SliceCount, 4u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 9u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 36u);
|
||||
EXPECT_EQ(gInfo.EUCount, 71u);
|
||||
|
||||
strConfig = "default";
|
||||
hardwareInfoSetup[productFamily](&gInfo, &fTable, false, strConfig);
|
||||
EXPECT_EQ(gInfo.SliceCount, 2u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 5u);
|
||||
EXPECT_EQ(gInfo.SubSliceCount, 10u);
|
||||
EXPECT_EQ(gInfo.EUCount, 39u);
|
||||
|
||||
strConfig = "erroneous";
|
||||
|
@ -5,14 +5,27 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/gmm_helper/gmm.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/gmm_helper/resource_info.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/helpers/hw_helper_tests.h"
|
||||
#include "unit_tests/helpers/unit_test_helper.h"
|
||||
|
||||
void HwHelperTest::SetUp() {
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
void HwHelperFixture::SetUp() {
|
||||
DeviceFixture::SetUp();
|
||||
}
|
||||
void HwHelperTest::TearDown() {
|
||||
void HwHelperFixture::TearDown() {
|
||||
DeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
TEST_F(HwHelperTest, getReturnsValidHwHelperHw) {
|
||||
@ -212,3 +225,336 @@ TEST(HwInfoTest, givenNodeOrdinalSetWhenChosenEngineTypeQueriedThenSetValueIsRet
|
||||
auto engineType = getChosenEngineType(hwInfo);
|
||||
EXPECT_EQ(EngineType::ENGINE_VECS, engineType);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenNoAllocationProvidedThenUseArgumentsasInput) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
auto gmmHelper = ee.getGmmHelper();
|
||||
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_EQ(sizeof(RENDER_SURFACE_STATE), helper.getRenderSurfaceStateSize());
|
||||
|
||||
size_t size = 0x1000;
|
||||
SURFACE_STATE_BUFFER_LENGTH length;
|
||||
length.Length = static_cast<uint32_t>(size - 1);
|
||||
uint64_t addr = 0x2000;
|
||||
size_t offset = 0x1000;
|
||||
uint32_t pitch = 0x40;
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, offset, pitch, nullptr, 0, type, true);
|
||||
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
EXPECT_EQ(pitch, state->getSurfacePitch());
|
||||
addr += offset;
|
||||
EXPECT_EQ(addr, state->getSurfaceBaseAddress());
|
||||
EXPECT_EQ(type, state->getSurfaceType());
|
||||
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState());
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
size = 0x1003;
|
||||
length.Length = static_cast<uint32_t>(alignUp(size, 4) - 1);
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, nullptr, 0, type, true);
|
||||
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState());
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
size = 0x1000;
|
||||
addr = 0x2001;
|
||||
length.Length = static_cast<uint32_t>(size - 1);
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, nullptr, 0, type, true);
|
||||
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState());
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
EXPECT_EQ(addr, state->getSurfaceBaseAddress());
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
size = 0x1005;
|
||||
length.Length = static_cast<uint32_t>(alignUp(size, 4) - 1);
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, nullptr, flags, type, true);
|
||||
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState());
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
EXPECT_EQ(addr, state->getSurfaceBaseAddress());
|
||||
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenAllocationProvidedThenUseAllocationAsInput) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
SURFACE_STATE_BUFFER_LENGTH length;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
void *cpuAddr = reinterpret_cast<void *>(0x4000);
|
||||
uint64_t gpuAddr = 0x4000u;
|
||||
size_t allocSize = size;
|
||||
length.Length = static_cast<uint32_t>(allocSize - 1);
|
||||
GraphicsAllocation allocation(cpuAddr, gpuAddr, 0u, allocSize, 0, false);
|
||||
allocation.gmm = new Gmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false);
|
||||
ASSERT_NE(nullptr, allocation.gmm);
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, &allocation, 0, type, true);
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
EXPECT_EQ(pitch, state->getSurfacePitch() - 1u);
|
||||
EXPECT_EQ(gpuAddr, state->getSurfaceBaseAddress());
|
||||
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
|
||||
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode());
|
||||
|
||||
delete allocation.gmm;
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxDisabledThenSetCoherencyToGpuAndAuxModeToCompression) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
void *cpuAddr = reinterpret_cast<void *>(0x4000);
|
||||
uint64_t gpuAddr = 0x4000u;
|
||||
size_t allocSize = size;
|
||||
GraphicsAllocation allocation(cpuAddr, gpuAddr, 0u, allocSize, 0, false);
|
||||
allocation.gmm = new Gmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false);
|
||||
ASSERT_NE(nullptr, allocation.gmm);
|
||||
allocation.gmm->isRenderCompressed = true;
|
||||
allocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, &allocation, 0, type, false);
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());
|
||||
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, state->getAuxiliarySurfaceMode());
|
||||
|
||||
delete allocation.gmm;
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionEnabledAndAllocationDisabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
void *cpuAddr = reinterpret_cast<void *>(0x4000);
|
||||
uint64_t gpuAddr = 0x4000u;
|
||||
size_t allocSize = size;
|
||||
GraphicsAllocation allocation(cpuAddr, gpuAddr, 0u, allocSize, 0, false);
|
||||
allocation.gmm = new Gmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false);
|
||||
ASSERT_NE(nullptr, allocation.gmm);
|
||||
allocation.gmm->isRenderCompressed = true;
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, &allocation, 0, type, false);
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
|
||||
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode());
|
||||
|
||||
delete allocation.gmm;
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionDisabledAndAllocationEnabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
void *cpuAddr = reinterpret_cast<void *>(0x4000);
|
||||
uint64_t gpuAddr = 0x4000u;
|
||||
size_t allocSize = size;
|
||||
GraphicsAllocation allocation(cpuAddr, gpuAddr, 0u, allocSize, 0, false);
|
||||
allocation.gmm = new Gmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false);
|
||||
ASSERT_NE(nullptr, allocation.gmm);
|
||||
allocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, &allocation, 0, type, false);
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
|
||||
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode());
|
||||
|
||||
delete allocation.gmm;
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxEnabledThenSetCoherencyToIaAndAuxModeToNone) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
|
||||
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
void *cpuAddr = reinterpret_cast<void *>(0x4000);
|
||||
uint64_t gpuAddr = 0x4000u;
|
||||
size_t allocSize = size;
|
||||
GraphicsAllocation allocation(cpuAddr, gpuAddr, 0u, allocSize, 0, false);
|
||||
allocation.gmm = new Gmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false);
|
||||
ASSERT_NE(nullptr, allocation.gmm);
|
||||
allocation.gmm->isRenderCompressed = true;
|
||||
allocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
helper.setRenderSurfaceStateForBuffer(ee, stateBuffer, size, addr, 0, pitch, &allocation, 0, type, true);
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
|
||||
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode());
|
||||
|
||||
delete allocation.gmm;
|
||||
alignedFree(stateBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, DISABLED_profilingCreationOfRenderSurfaceStateVsMemcpyOfCachelineAlignedBuffer) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
|
||||
constexpr uint32_t maxLoop = 1000u;
|
||||
|
||||
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> timesCreate;
|
||||
timesCreate.reserve(maxLoop * 2);
|
||||
|
||||
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> timesMemCpy;
|
||||
timesMemCpy.reserve(maxLoop * 2);
|
||||
|
||||
std::vector<int64_t> nanoDurationCreate;
|
||||
nanoDurationCreate.reserve(maxLoop);
|
||||
|
||||
std::vector<int64_t> nanoDurationCpy;
|
||||
nanoDurationCpy.reserve(maxLoop);
|
||||
|
||||
std::vector<void *> surfaceStates;
|
||||
surfaceStates.reserve(maxLoop);
|
||||
|
||||
std::vector<void *> copyBuffers;
|
||||
copyBuffers.reserve(maxLoop);
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; ++i) {
|
||||
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, stateBuffer);
|
||||
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
|
||||
surfaceStates.push_back(stateBuffer);
|
||||
|
||||
void *copyBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
|
||||
ASSERT_NE(nullptr, copyBuffer);
|
||||
copyBuffers.push_back(copyBuffer);
|
||||
}
|
||||
|
||||
ExecutionEnvironment &ee = *pDevice->getExecutionEnvironment();
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
size_t size = 0x1000;
|
||||
uint64_t addr = 0x2000;
|
||||
uint32_t pitch = 0;
|
||||
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; ++i) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
helper.setRenderSurfaceStateForBuffer(ee, surfaceStates[i], size, addr, 0, pitch, nullptr, 0, type, true);
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
timesCreate.push_back(t1);
|
||||
timesCreate.push_back(t2);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; ++i) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
memcpy_s(copyBuffers[i], sizeof(RENDER_SURFACE_STATE), surfaceStates[i], sizeof(RENDER_SURFACE_STATE));
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
timesMemCpy.push_back(t1);
|
||||
timesMemCpy.push_back(t2);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; ++i) {
|
||||
std::chrono::duration<double> delta = timesCreate[i * 2 + 1] - timesCreate[i * 2];
|
||||
std::chrono::nanoseconds duration = std::chrono::duration_cast<std::chrono::nanoseconds>(delta);
|
||||
nanoDurationCreate.push_back(duration.count());
|
||||
|
||||
delta = timesMemCpy[i * 2 + 1] - timesMemCpy[i * 2];
|
||||
duration = std::chrono::duration_cast<std::chrono::nanoseconds>(delta);
|
||||
nanoDurationCpy.push_back(duration.count());
|
||||
}
|
||||
|
||||
sort(nanoDurationCreate.begin(), nanoDurationCreate.end());
|
||||
sort(nanoDurationCpy.begin(), nanoDurationCpy.end());
|
||||
|
||||
double averageCreate = std::accumulate(nanoDurationCreate.begin(), nanoDurationCreate.end(), 0.0) / nanoDurationCreate.size();
|
||||
double averageCpy = std::accumulate(nanoDurationCpy.begin(), nanoDurationCpy.end(), 0.0) / nanoDurationCpy.size();
|
||||
|
||||
size_t middleCreate = nanoDurationCreate.size() / 2;
|
||||
size_t middleCpy = nanoDurationCpy.size() / 2;
|
||||
|
||||
std::cout << "Creation average: " << averageCreate << " median: " << nanoDurationCreate[middleCreate];
|
||||
std::cout << " min: " << nanoDurationCreate[0] << " max: " << nanoDurationCreate[nanoDurationCreate.size() - 1] << std::endl;
|
||||
std::cout << "Copy average: " << averageCpy << " median: " << nanoDurationCpy[middleCpy];
|
||||
std::cout << " min: " << nanoDurationCpy[0] << " max: " << nanoDurationCpy[nanoDurationCpy.size() - 1] << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; i++) {
|
||||
std::cout << "#" << (i + 1) << " Create: " << nanoDurationCreate[i] << " Copy: " << nanoDurationCpy[i] << std::endl;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < maxLoop; ++i) {
|
||||
alignedFree(surfaceStates[i]);
|
||||
alignedFree(copyBuffers[i]);
|
||||
}
|
||||
}
|
||||
|
@ -9,16 +9,19 @@
|
||||
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "test.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/helpers/hw_info_helper.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
class HwHelperTest : public testing::Test {
|
||||
class HwHelperFixture : public DeviceFixture {
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
void SetUp();
|
||||
void TearDown();
|
||||
HwInfoHelper hwInfoHelper;
|
||||
};
|
||||
|
||||
using HwHelperTest = Test<HwHelperFixture>;
|
||||
|
||||
void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper, const HardwareInfo &hwInfo);
|
||||
|
@ -417,6 +417,41 @@ TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) {
|
||||
|
||||
delete pKernel;
|
||||
pKernel = nullptr;
|
||||
|
||||
pKernelInfo = pProgram->getKernelInfo("simple_kernel_6");
|
||||
|
||||
pKernel = Kernel::create(
|
||||
pProgram,
|
||||
*pKernelInfo,
|
||||
&retVal);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
// get builtIn property
|
||||
isBuiltIn = pKernel->isBuiltIn;
|
||||
|
||||
EXPECT_FALSE(isBuiltIn);
|
||||
|
||||
delete pKernel;
|
||||
pKernel = nullptr;
|
||||
pKernelInfo = pProgram->getKernelInfo("simple_kernel_7");
|
||||
|
||||
pKernel = Kernel::create(
|
||||
pProgram,
|
||||
*pKernelInfo,
|
||||
&retVal);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
// get builtIn property
|
||||
isBuiltIn = pKernel->isBuiltIn;
|
||||
|
||||
EXPECT_FALSE(isBuiltIn);
|
||||
|
||||
delete pKernel;
|
||||
pKernel = nullptr;
|
||||
}
|
||||
|
||||
TEST(PatchInfo, Constructor) {
|
||||
|
@ -39,6 +39,7 @@ GENX::INTERFACE_DESCRIPTOR_DATA GENX::cmdInitInterfaceDescriptorData = GENX::INT
|
||||
GENX::MEDIA_STATE_FLUSH GENX::cmdInitMediaStateFlush = GENX::MEDIA_STATE_FLUSH::sInit();
|
||||
GENX::MEDIA_INTERFACE_DESCRIPTOR_LOAD GENX::cmdInitMediaInterfaceDescriptorLoad = GENX::MEDIA_INTERFACE_DESCRIPTOR_LOAD::sInit();
|
||||
GENX::MI_SEMAPHORE_WAIT GENX::cmdInitMiSemaphoreWait = GENX::MI_SEMAPHORE_WAIT::sInit();
|
||||
GENX::RENDER_SURFACE_STATE GENX::cmdRenderSurfaceState = GENX::RENDER_SURFACE_STATE::sInit();
|
||||
bool GENX::enabledYTiling = true;
|
||||
|
||||
template <>
|
||||
@ -66,11 +67,6 @@ const AubMemDump::LrcaHelper &HwHelperHw<GENX>::getCsTraits(EngineInstanceT engi
|
||||
return *AUBFamilyMapper<GENX>::csTraits[engineInstance.type];
|
||||
}
|
||||
|
||||
struct hw_helper_static_init {
|
||||
hw_helper_static_init() {
|
||||
hwHelperFactory[IGFX_UNKNOWN_CORE] = &HwHelperHw<GENX>::get();
|
||||
}
|
||||
};
|
||||
template <>
|
||||
bool HwHelperHw<GENX>::supportsYTiling() const {
|
||||
return GENX::enabledYTiling;
|
||||
@ -78,6 +74,12 @@ bool HwHelperHw<GENX>::supportsYTiling() const {
|
||||
|
||||
template class HwHelperHw<GENX>;
|
||||
|
||||
struct hw_helper_static_init {
|
||||
hw_helper_static_init() {
|
||||
hwHelperFactory[IGFX_UNKNOWN_CORE] = &HwHelperHw<GENX>::get();
|
||||
}
|
||||
};
|
||||
|
||||
hw_helper_static_init si;
|
||||
|
||||
template class GpgpuWalkerHelper<GENX>;
|
||||
|
@ -287,6 +287,64 @@ struct GENX {
|
||||
} MI_BATCH_BUFFER_END;
|
||||
|
||||
typedef struct tagRENDER_SURFACE_STATE {
|
||||
typedef enum tagSURFACE_VERTICAL_ALIGNMENT {
|
||||
SURFACE_VERTICAL_ALIGNMENT_VALIGN_4 = 0x1,
|
||||
} SURFACE_VERTICAL_ALIGNMENT;
|
||||
typedef enum tagSURFACE_HORIZONTAL_ALIGNMENT {
|
||||
SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4 = 0x1,
|
||||
} SURFACE_HORIZONTAL_ALIGNMENT;
|
||||
typedef enum tagTILE_MODE {
|
||||
TILE_MODE_LINEAR = 0x0,
|
||||
} TILE_MODE;
|
||||
typedef enum tagSURFACE_TYPE {
|
||||
SURFACE_TYPE_SURFTYPE_BUFFER = 0x4,
|
||||
SURFACE_TYPE_SURFTYPE_NULL = 0x7
|
||||
} SURFACE_TYPE;
|
||||
typedef enum tagSURFACE_FORMAT {
|
||||
SURFACE_FORMAT_RAW = 0x1ff,
|
||||
} SURFACE_FORMAT;
|
||||
typedef enum tagAUXILIARY_SURFACE_MODE {
|
||||
AUXILIARY_SURFACE_MODE_AUX_NONE = 0x0,
|
||||
AUXILIARY_SURFACE_MODE_AUX_CCS_E = 0x5,
|
||||
} AUXILIARY_SURFACE_MODE;
|
||||
typedef enum tagCOHERENCY_TYPE {
|
||||
COHERENCY_TYPE_GPU_COHERENT = 0x0,
|
||||
COHERENCY_TYPE_IA_COHERENT = 0x1,
|
||||
} COHERENCY_TYPE;
|
||||
static tagRENDER_SURFACE_STATE sInit(void) {
|
||||
RENDER_SURFACE_STATE state;
|
||||
return state;
|
||||
}
|
||||
inline void setWidth(const uint32_t value) {
|
||||
}
|
||||
inline void setHeight(const uint32_t value) {
|
||||
}
|
||||
inline void setDepth(const uint32_t value) {
|
||||
}
|
||||
inline void setSurfacePitch(const uint32_t value) {
|
||||
}
|
||||
inline void setSurfaceType(const SURFACE_TYPE value) {
|
||||
}
|
||||
inline void setSurfaceFormat(const SURFACE_FORMAT value) {
|
||||
}
|
||||
inline void setSurfaceVerticalAlignment(const SURFACE_VERTICAL_ALIGNMENT value) {
|
||||
}
|
||||
inline void setSurfaceHorizontalAlignment(const SURFACE_HORIZONTAL_ALIGNMENT value) {
|
||||
}
|
||||
inline void setTileMode(const TILE_MODE value) {
|
||||
}
|
||||
inline void setVerticalLineStride(const uint32_t value) {
|
||||
}
|
||||
inline void setVerticalLineStrideOffset(const uint32_t value) {
|
||||
}
|
||||
inline void setMemoryObjectControlState(const uint32_t value) {
|
||||
}
|
||||
inline void setSurfaceBaseAddress(const uint64_t value) {
|
||||
}
|
||||
inline void setCoherencyType(const COHERENCY_TYPE value) {
|
||||
}
|
||||
inline void setAuxiliarySurfaceMode(const AUXILIARY_SURFACE_MODE value) {
|
||||
}
|
||||
} RENDER_SURFACE_STATE;
|
||||
|
||||
typedef struct tagMEDIA_VFE_STATE {
|
||||
@ -379,6 +437,7 @@ struct GENX {
|
||||
static MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad;
|
||||
static MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait;
|
||||
static PIPE_CONTROL cmdInitPipeControl;
|
||||
static RENDER_SURFACE_STATE cmdRenderSurfaceState;
|
||||
};
|
||||
|
||||
template <>
|
||||
|
@ -7,7 +7,9 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||
#include "runtime/execution_environment/execution_environment.h"
|
||||
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "unit_tests/mocks/mock_experimental_command_buffer.h"
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
@ -20,7 +22,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass = CommandStreamReceiverHw<GfxFamily>;
|
||||
|
||||
public:
|
||||
using BaseClass::createScratchSpaceAllocation;
|
||||
using BaseClass::deviceIndex;
|
||||
using BaseClass::dshState;
|
||||
using BaseClass::getScratchPatchAddress;
|
||||
using BaseClass::hwInfo;
|
||||
@ -51,7 +53,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::requiredScratchSize;
|
||||
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::scratchAllocation;
|
||||
using BaseClass::CommandStreamReceiver::scratchSpaceController;
|
||||
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
||||
using BaseClass::CommandStreamReceiver::taskCount;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "unit_tests/tests_configuration.h"
|
||||
#include "runtime/gmm_helper/resource_info.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include "runtime/os_interface/hw_info_config.h"
|
||||
#include "External/Common/GmmLibDllName.h"
|
||||
#include "mock_gmm_client_context.h"
|
||||
#include "gmock/gmock.h"
|
||||
@ -185,9 +186,10 @@ int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
::testing::InitGoogleMock(&argc, argv);
|
||||
|
||||
std::string hwInfoConfig = "default";
|
||||
auto numDevices = numPlatformDevices;
|
||||
HardwareInfo device = DEFAULT_TEST_PLATFORM::hwInfo;
|
||||
hardwareInfoSetup[device.pPlatform->eProductFamily](const_cast<GT_SYSTEM_INFO *>(device.pSysInfo), const_cast<FeatureTable *>(device.pSkuTable), setupFeatureTable, hwInfoConfig);
|
||||
GT_SYSTEM_INFO gtSystemInfo = *device.pSysInfo;
|
||||
FeatureTable featureTable = *device.pSkuTable;
|
||||
|
||||
@ -195,7 +197,7 @@ int main(int argc, char **argv) {
|
||||
uint32_t euPerSubSlice = 0;
|
||||
uint32_t sliceCount = 0;
|
||||
uint32_t subSliceCount = 0;
|
||||
int dieRecovery = 1;
|
||||
int dieRecovery = 0;
|
||||
::productFamily = device.pPlatform->eProductFamily;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
@ -277,7 +279,7 @@ int main(int argc, char **argv) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t threadsPerEu = 7;
|
||||
uint32_t threadsPerEu = hwInfoConfigFactory[productFamily]->threadsPerEu;
|
||||
PLATFORM platform;
|
||||
auto hardwareInfo = hardwareInfoTable[productFamily];
|
||||
if (!hardwareInfo) {
|
||||
@ -288,7 +290,6 @@ int main(int argc, char **argv) {
|
||||
platform.usRevId = (uint16_t)revisionId;
|
||||
|
||||
// set Gt and FeatureTable to initial state
|
||||
std::string hwInfoConfig = "default";
|
||||
hardwareInfoSetup[productFamily](>SystemInfo, &featureTable, setupFeatureTable, hwInfoConfig);
|
||||
// and adjust dynamic values if not secified
|
||||
sliceCount = sliceCount > 0 ? sliceCount : gtSystemInfo.SliceCount;
|
||||
@ -296,7 +297,7 @@ int main(int argc, char **argv) {
|
||||
euPerSubSlice = euPerSubSlice > 0 ? euPerSubSlice : gtSystemInfo.MaxEuPerSubSlice;
|
||||
// clang-format off
|
||||
gtSystemInfo.SliceCount = sliceCount;
|
||||
gtSystemInfo.SubSliceCount = subSliceCount;
|
||||
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSliceCount;
|
||||
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
|
||||
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
|
||||
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
|
||||
|
@ -34,6 +34,7 @@ MockContext::MockContext(Device *device, bool noSpecialQueue) {
|
||||
MockContext::MockContext(
|
||||
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
|
||||
void *data) {
|
||||
device = nullptr;
|
||||
properties = nullptr;
|
||||
numProperties = 0;
|
||||
contextCallback = funcNotify;
|
||||
|
@ -247,6 +247,7 @@ class MockKernelWithInternals {
|
||||
memset(&executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
|
||||
memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState));
|
||||
executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
kernelHeader.SurfaceStateHeapSize = sizeof(sshLocal);
|
||||
@ -259,6 +260,7 @@ class MockKernelWithInternals {
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
kernelInfo.patchInfo.mediavfestate = &mediaVfeState;
|
||||
|
||||
if (context == nullptr) {
|
||||
mockContext = new MockContext;
|
||||
@ -287,9 +289,10 @@ class MockKernelWithInternals {
|
||||
MockProgram *mockProgram;
|
||||
Context *mockContext;
|
||||
KernelInfo kernelInfo;
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
SPatchMediaVFEState mediaVfeState = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchExecutionEnvironment executionEnvironmentBlock = {};
|
||||
uint32_t kernelIsa[32];
|
||||
|
@ -48,3 +48,48 @@ __kernel void simple_kernel_4() {
|
||||
__kernel void simple_kernel_5(__global uint *dst) {
|
||||
atomic_inc(dst);
|
||||
}
|
||||
|
||||
#define SIMPLE_KERNEL_6_ARRAY_SIZE 256
|
||||
__kernel void simple_kernel_6(__global uint *dst, __global uint2 *src, uint scalar, uint maxIterations, uint maxIterations2) {
|
||||
__private uint2 array[SIMPLE_KERNEL_6_ARRAY_SIZE];
|
||||
__private uint2 sum;
|
||||
__private size_t gid = get_global_id(0);
|
||||
__private size_t lid = get_local_id(0);
|
||||
|
||||
__private uint multi = 1;
|
||||
if(lid == 1024) {
|
||||
multi = 4;
|
||||
}
|
||||
sum = (uint2)(0, 0);
|
||||
|
||||
for(int i = 0; i < maxIterations; ++i) {
|
||||
array[i] = src[i] + (uint2)(i*multi, i*multi+scalar);
|
||||
}
|
||||
|
||||
for(int i = 0; i < maxIterations2; ++i) {
|
||||
sum.x = array[i].x + sum.x;
|
||||
sum.y = array[i].y + sum.y;
|
||||
}
|
||||
|
||||
vstore2(sum, gid, dst);
|
||||
}
|
||||
|
||||
typedef long16 TYPE;
|
||||
__attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32
|
||||
__attribute__((intel_reqd_sub_group_size(8))) // force SIMD to 8
|
||||
__kernel void simple_kernel_7(__global int *resIdx, global TYPE *src, global TYPE *dst){
|
||||
size_t lid = get_local_id(0);
|
||||
size_t gid = get_global_id(0);
|
||||
|
||||
TYPE res1 = src[gid*3];
|
||||
TYPE res2 = src[gid*3+1];
|
||||
TYPE res3 = src[gid*3+2];
|
||||
|
||||
__local TYPE locMem[32];
|
||||
locMem[lid] = res1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
TYPE res = (locMem[resIdx[gid]]*res3)*res2 + res1;
|
||||
|
||||
dst[gid] = res;
|
||||
}
|
||||
|
Reference in New Issue
Block a user