mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Reorganization directory structure [3/n]
Change-Id: If3dfa3f6007f8810a6a1ae1a4f0c7da38544648d
This commit is contained in:
91
shared/source/helpers/CMakeLists.txt
Normal file
91
shared/source/helpers/CMakeLists.txt
Normal file
@@ -0,0 +1,91 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common_types.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/engine_control.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/engine_node_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hash.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/hw_helper_extended.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/options.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_compute_mode_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/stdio.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/string.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/surface_format_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec.h
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})
|
||||
if(WIN32)
|
||||
set(NEO_CORE_SRCS_HELPERS_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl
|
||||
)
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_HELPERS_WINDOWS ${NEO_CORE_SRCS_HELPERS_WINDOWS})
|
||||
endif()
|
||||
|
||||
add_subdirectories()
|
||||
16
shared/source/helpers/abort.cpp
Normal file
16
shared/source/helpers/abort.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/abort.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace NEO {
|
||||
void abortExecution() {
|
||||
abort();
|
||||
}
|
||||
} // namespace NEO
|
||||
12
shared/source/helpers/abort.h
Normal file
12
shared/source/helpers/abort.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace NEO {
|
||||
[[noreturn]] void abortExecution();
|
||||
}
|
||||
80
shared/source/helpers/address_patch.h
Normal file
80
shared/source/helpers/address_patch.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum PatchInfoAllocationType {
|
||||
Default = 0,
|
||||
KernelArg,
|
||||
GeneralStateHeap,
|
||||
DynamicStateHeap,
|
||||
IndirectObjectHeap,
|
||||
SurfaceStateHeap,
|
||||
InstructionHeap,
|
||||
TagAddress,
|
||||
TagValue,
|
||||
GUCStartMessage,
|
||||
ScratchSpace
|
||||
};
|
||||
|
||||
struct PatchInfoData {
|
||||
uint64_t sourceAllocation;
|
||||
uint64_t sourceAllocationOffset;
|
||||
PatchInfoAllocationType sourceType;
|
||||
uint64_t targetAllocation;
|
||||
uint64_t targetAllocationOffset;
|
||||
PatchInfoAllocationType targetType;
|
||||
uint32_t patchAddressSize;
|
||||
|
||||
PatchInfoData(uint64_t sourceAllocation,
|
||||
uint64_t sourceAllocationOffset,
|
||||
PatchInfoAllocationType sourceType,
|
||||
uint64_t targetAllocation,
|
||||
uint64_t targetAllocationOffset,
|
||||
PatchInfoAllocationType targetType,
|
||||
uint32_t patchAddressSize)
|
||||
: sourceAllocation(sourceAllocation),
|
||||
sourceAllocationOffset(sourceAllocationOffset),
|
||||
sourceType(sourceType),
|
||||
targetAllocation(targetAllocation),
|
||||
targetAllocationOffset(targetAllocationOffset),
|
||||
targetType(targetType),
|
||||
patchAddressSize(patchAddressSize) {
|
||||
}
|
||||
|
||||
PatchInfoData(uint64_t sourceAllocation,
|
||||
uint64_t sourceAllocationOffset,
|
||||
PatchInfoAllocationType sourceType,
|
||||
uint64_t targetAllocation,
|
||||
uint64_t targetAllocationOffset,
|
||||
PatchInfoAllocationType targetType)
|
||||
: sourceAllocation(sourceAllocation),
|
||||
sourceAllocationOffset(sourceAllocationOffset),
|
||||
sourceType(sourceType),
|
||||
targetAllocation(targetAllocation),
|
||||
targetAllocationOffset(targetAllocationOffset),
|
||||
targetType(targetType),
|
||||
patchAddressSize(sizeof(void *)) {
|
||||
}
|
||||
|
||||
bool requiresIndirectPatching() {
|
||||
return (targetType != PatchInfoAllocationType::Default && targetType != PatchInfoAllocationType::GUCStartMessage);
|
||||
}
|
||||
};
|
||||
|
||||
struct CommandChunk {
|
||||
uint64_t baseAddressCpu = 0;
|
||||
uint64_t baseAddressGpu = 0;
|
||||
uint64_t startOffset = 0;
|
||||
uint64_t endOffset = 0;
|
||||
uint64_t batchBufferStartLocation = 0;
|
||||
uint64_t batchBufferStartAddress = 0;
|
||||
};
|
||||
} // namespace NEO
|
||||
112
shared/source/helpers/aligned_memory.h
Normal file
112
shared/source/helpers/aligned_memory.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/debug_helpers.h"
|
||||
#include "memory_manager/memory_constants.h"
|
||||
#include "opencl/source/utilities/logger.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <new>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNAS(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNAS(x) alignas(x)
|
||||
#endif
|
||||
|
||||
template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
|
||||
constexpr inline TNoRef alignUp(T before, size_t alignment) {
|
||||
TNoRef mask = static_cast<TNoRef>(alignment - 1);
|
||||
return (before + mask) & ~mask;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr inline T *alignUp(T *ptrBefore, size_t alignment) {
|
||||
return reinterpret_cast<T *>(alignUp(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
|
||||
}
|
||||
|
||||
template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
|
||||
constexpr inline TNoRef alignDown(T before, size_t alignment) {
|
||||
TNoRef mask = static_cast<TNoRef>(alignment - 1);
|
||||
return before & ~mask;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr inline T *alignDown(T *ptrBefore, size_t alignment) {
|
||||
return reinterpret_cast<T *>(alignDown(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
|
||||
}
|
||||
|
||||
inline void *alignedMalloc(size_t bytes, size_t alignment) {
|
||||
DEBUG_BREAK_IF(alignment <= 0);
|
||||
|
||||
if (bytes == 0) {
|
||||
bytes = sizeof(void *);
|
||||
}
|
||||
|
||||
// Make sure our alignment is at least the size of a pointer
|
||||
alignment = std::max(alignment, sizeof(void *));
|
||||
|
||||
// Allocate _bytes + _alignment
|
||||
size_t sizeToAlloc = bytes + alignment;
|
||||
auto pOriginalMemory = new (std::nothrow) char[sizeToAlloc];
|
||||
|
||||
// Add in the alignment
|
||||
auto pAlignedMemory = reinterpret_cast<uintptr_t>(pOriginalMemory);
|
||||
if (pAlignedMemory) {
|
||||
pAlignedMemory += alignment;
|
||||
pAlignedMemory -= pAlignedMemory % alignment;
|
||||
|
||||
// Store the original pointer to facilitate deallocation
|
||||
reinterpret_cast<void **>(pAlignedMemory)[-1] = pOriginalMemory;
|
||||
}
|
||||
|
||||
DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(pOriginalMemory), "size:", sizeToAlloc);
|
||||
// Return result
|
||||
return reinterpret_cast<void *>(pAlignedMemory); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
|
||||
}
|
||||
|
||||
inline void alignedFree(void *ptr) {
|
||||
if (ptr) {
|
||||
auto originalPtr = reinterpret_cast<char **>(ptr)[-1];
|
||||
DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(originalPtr));
|
||||
delete[] originalPtr;
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t alignSizeWholePage(const void *ptr, size_t size) {
|
||||
uintptr_t startPageMisalignedAddressOffset = reinterpret_cast<uintptr_t>(ptr) & MemoryConstants::pageMask;
|
||||
size_t alignedSizeToPage = alignUp(startPageMisalignedAddressOffset + size, MemoryConstants::pageSize);
|
||||
return alignedSizeToPage;
|
||||
}
|
||||
|
||||
template <size_t alignment, typename T>
|
||||
inline constexpr bool isAligned(T val) {
|
||||
return (static_cast<size_t>(val) % alignment) == 0;
|
||||
}
|
||||
|
||||
template <size_t alignment, typename T>
|
||||
inline bool isAligned(T *ptr) {
|
||||
return ((reinterpret_cast<uintptr_t>(ptr)) % alignment) == 0;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline bool isAligned(T1 ptr, T2 alignment) {
|
||||
return ((static_cast<size_t>(ptr)) & (static_cast<size_t>(alignment) - 1u)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool isAligned(T *ptr) {
|
||||
return (reinterpret_cast<uintptr_t>(ptr) & (alignof(T) - 1)) == 0;
|
||||
}
|
||||
inline auto allocateAlignedMemory(size_t bytes, size_t alignment) {
|
||||
return std::unique_ptr<void, std::function<decltype(alignedFree)>>(alignedMalloc(bytes, alignment), alignedFree);
|
||||
}
|
||||
15
shared/source/helpers/allow_deferred_deleter.cpp
Normal file
15
shared/source/helpers/allow_deferred_deleter.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
#include "helpers/deferred_deleter_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
bool isDeferredDeleterEnabled() {
|
||||
return DebugManager.flags.EnableDeferredDeleter.get();
|
||||
}
|
||||
} // namespace NEO
|
||||
20
shared/source/helpers/array_count.h
Normal file
20
shared/source/helpers/array_count.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
template <typename T, size_t N>
|
||||
constexpr size_t arrayCount(const T (&)[N]) {
|
||||
return N;
|
||||
}
|
||||
|
||||
template <typename T, size_t N>
|
||||
constexpr bool isInRange(size_t idx, const T (&)[N]) {
|
||||
return (idx < N);
|
||||
}
|
||||
24
shared/source/helpers/aux_translation.h
Normal file
24
shared/source/helpers/aux_translation.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum class AuxTranslationDirection {
|
||||
None,
|
||||
AuxToNonAux,
|
||||
NonAuxToAux
|
||||
};
|
||||
|
||||
enum class AuxTranslationMode : int32_t {
|
||||
Builtin = 0,
|
||||
Blit = 1
|
||||
};
|
||||
} // namespace NEO
|
||||
182
shared/source/helpers/basic_math.h
Normal file
182
shared/source/helpers/basic_math.h
Normal file
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/vec.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
|
||||
#define KB 1024uLL
|
||||
#define MB (KB * KB)
|
||||
#define GB (KB * MB)
|
||||
|
||||
namespace Math {
|
||||
|
||||
constexpr uint32_t nextPowerOfTwo(uint32_t value) {
|
||||
--value;
|
||||
value |= value >> 1;
|
||||
value |= value >> 2;
|
||||
value |= value >> 4;
|
||||
value |= value >> 8;
|
||||
value |= value >> 16;
|
||||
++value;
|
||||
return value;
|
||||
}
|
||||
|
||||
constexpr uint64_t nextPowerOfTwo(uint64_t value) {
|
||||
--value;
|
||||
value |= value >> 1;
|
||||
value |= value >> 2;
|
||||
value |= value >> 4;
|
||||
value |= value >> 8;
|
||||
value |= value >> 16;
|
||||
value |= value >> 32;
|
||||
++value;
|
||||
return value;
|
||||
}
|
||||
|
||||
constexpr uint32_t prevPowerOfTwo(uint32_t value) {
|
||||
value |= value >> 1;
|
||||
value |= value >> 2;
|
||||
value |= value >> 4;
|
||||
value |= value >> 8;
|
||||
value |= value >> 16;
|
||||
return (value - (value >> 1));
|
||||
}
|
||||
|
||||
constexpr uint64_t prevPowerOfTwo(uint64_t value) {
|
||||
value |= value >> 1;
|
||||
value |= value >> 2;
|
||||
value |= value >> 4;
|
||||
value |= value >> 8;
|
||||
value |= value >> 16;
|
||||
value |= value >> 32;
|
||||
return (value - (value >> 1));
|
||||
}
|
||||
|
||||
inline uint32_t getMinLsbSet(uint32_t value) {
|
||||
static const uint8_t multiplyDeBruijnBitPosition[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
||||
auto invert = -static_cast<int64_t>(value);
|
||||
value &= static_cast<uint32_t>(invert);
|
||||
return multiplyDeBruijnBitPosition[static_cast<uint32_t>(value * 0x077CB531U) >> 27];
|
||||
}
|
||||
|
||||
constexpr uint32_t log2(uint32_t value) {
|
||||
if (value == 0) {
|
||||
return 32;
|
||||
}
|
||||
uint32_t exponent = 0u;
|
||||
while (value >>= 1) {
|
||||
exponent++;
|
||||
}
|
||||
return exponent;
|
||||
}
|
||||
|
||||
constexpr uint32_t log2(uint64_t value) {
|
||||
if (value == 0) {
|
||||
return 64;
|
||||
}
|
||||
uint32_t exponent = 0;
|
||||
while (value >>= 1) {
|
||||
exponent++;
|
||||
}
|
||||
return exponent;
|
||||
}
|
||||
|
||||
union FloatConversion {
|
||||
uint32_t u;
|
||||
float f;
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
static const FloatConversion PosInfinity = {0x7f800000};
|
||||
static const FloatConversion NegInfinity = {0xff800000};
|
||||
static const FloatConversion Nan = {0x7fc00000};
|
||||
// clang-format on
|
||||
|
||||
inline uint16_t float2Half(float f) {
|
||||
FloatConversion u;
|
||||
u.f = f;
|
||||
|
||||
uint32_t fsign = (u.u >> 16) & 0x8000;
|
||||
float x = std::fabs(f);
|
||||
|
||||
//Nan
|
||||
if (x != x) {
|
||||
u.u >>= (24 - 11);
|
||||
u.u &= 0x7fff;
|
||||
u.u |= 0x0200; //silence the NaN
|
||||
return u.u | fsign;
|
||||
}
|
||||
|
||||
// overflow
|
||||
if (x >= std::ldexp(1.0f, 16)) {
|
||||
if (x == PosInfinity.f)
|
||||
return 0x7c00 | fsign;
|
||||
|
||||
return 0x7bff | fsign;
|
||||
}
|
||||
|
||||
// underflow
|
||||
if (x < std::ldexp(1.0f, -24))
|
||||
return fsign; // The halfway case can return 0x0001 or 0. 0 is even.
|
||||
|
||||
// half denormal
|
||||
if (x < std::ldexp(1.0f, -14)) {
|
||||
x *= std::ldexp(1.0f, 24);
|
||||
return (uint16_t)((int)x | fsign);
|
||||
}
|
||||
|
||||
u.u &= 0xFFFFE000U;
|
||||
u.u -= 0x38000000U;
|
||||
|
||||
return (u.u >> (24 - 11)) | fsign;
|
||||
}
|
||||
|
||||
constexpr bool isDivisibleByPowerOfTwoDivisor(uint32_t number, uint32_t divisor) {
|
||||
return (number & (divisor - 1)) == 0;
|
||||
}
|
||||
|
||||
constexpr size_t computeTotalElementsCount(const Vec3<size_t> &inputVector) {
|
||||
size_t minElementCount = 1;
|
||||
auto xDim = std::max(minElementCount, inputVector.x);
|
||||
auto yDim = std::max(minElementCount, inputVector.y);
|
||||
auto zDim = std::max(minElementCount, inputVector.z);
|
||||
return xDim * yDim * zDim;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isPow2(T val) {
|
||||
return val != 0 && (val & (val - 1)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T ffs(T v) {
|
||||
if (v == 0) {
|
||||
return std::numeric_limits<T>::max();
|
||||
}
|
||||
|
||||
for (T i = 0; i < sizeof(T) * 8; ++i) {
|
||||
if (0 != (v & (1ULL << i))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
constexpr size_t divideAndRoundUp(size_t dividend, size_t divisor) {
|
||||
return (dividend + divisor - 1) / divisor;
|
||||
}
|
||||
|
||||
} // namespace Math
|
||||
40
shared/source/helpers/bit_helpers.h
Normal file
40
shared/source/helpers/bit_helpers.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
constexpr bool isBitSet(uint64_t field, uint64_t bitPosition) {
|
||||
assert(bitPosition < std::numeric_limits<uint64_t>::digits); // undefined behavior
|
||||
return (field & (1ull << bitPosition));
|
||||
}
|
||||
|
||||
constexpr bool isAnyBitSet(uint64_t field, uint64_t checkedBits) {
|
||||
return ((field & checkedBits) != 0);
|
||||
}
|
||||
|
||||
constexpr bool isValueSet(uint64_t field, uint64_t value) {
|
||||
assert(value != 0);
|
||||
return ((field & value) == value);
|
||||
}
|
||||
|
||||
constexpr bool isFieldValid(uint64_t field, uint64_t acceptedBits) {
|
||||
return ((field & (~acceptedBits)) == 0);
|
||||
}
|
||||
|
||||
constexpr uint64_t setBits(uint64_t field, bool newValue, uint64_t bitsToModify) {
|
||||
if (newValue) {
|
||||
return (field | bitsToModify);
|
||||
}
|
||||
return (field & (~bitsToModify));
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
125
shared/source/helpers/blit_commands_helper.cpp
Normal file
125
shared/source/helpers/blit_commands_helper.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/blit_commands_helper.h"
|
||||
|
||||
#include "helpers/timestamp_packet.h"
|
||||
#include "memory_manager/surface.h"
|
||||
|
||||
namespace NEO {
|
||||
BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
|
||||
CommandStreamReceiver &commandStreamReceiver,
|
||||
GraphicsAllocation *memObjAllocation,
|
||||
GraphicsAllocation *preallocatedHostAllocation,
|
||||
void *hostPtr, uint64_t memObjGpuVa,
|
||||
uint64_t hostAllocGpuVa, size_t hostPtrOffset,
|
||||
size_t copyOffset, uint64_t copySize) {
|
||||
|
||||
GraphicsAllocation *hostAllocation = nullptr;
|
||||
|
||||
if (preallocatedHostAllocation) {
|
||||
hostAllocation = preallocatedHostAllocation;
|
||||
UNRECOVERABLE_IF(hostAllocGpuVa == 0);
|
||||
} else {
|
||||
HostPtrSurface hostPtrSurface(hostPtr, static_cast<size_t>(copySize), true);
|
||||
bool success = commandStreamReceiver.createAllocationForHostSurface(hostPtrSurface, false);
|
||||
UNRECOVERABLE_IF(!success);
|
||||
hostAllocation = hostPtrSurface.getAllocation();
|
||||
hostAllocGpuVa = hostAllocation->getGpuAddress();
|
||||
}
|
||||
|
||||
if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) {
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
blitDirection, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
memObjAllocation, // dstAllocation
|
||||
hostAllocation, // srcAllocation
|
||||
memObjGpuVa, // dstGpuAddress
|
||||
hostAllocGpuVa, // srcGpuAddress
|
||||
copySize, // copySize
|
||||
copyOffset, // dstOffset
|
||||
hostPtrOffset}; // srcOffset
|
||||
} else {
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
blitDirection, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
hostAllocation, // dstAllocation
|
||||
memObjAllocation, // srcAllocation
|
||||
hostAllocGpuVa, // dstGpuAddress
|
||||
memObjGpuVa, // srcGpuAddress
|
||||
copySize, // copySize
|
||||
hostPtrOffset, // dstOffset
|
||||
copyOffset}; // srcOffset
|
||||
}
|
||||
}
|
||||
|
||||
BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
|
||||
size_t dstOffset, size_t srcOffset, uint64_t copySize) {
|
||||
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
|
||||
{}, // csrDependencies
|
||||
AuxTranslationDirection::None, // auxTranslationDirection
|
||||
dstAllocation, // dstAllocation
|
||||
srcAllocation, // srcAllocation
|
||||
dstAllocation->getGpuAddress(), // dstGpuAddress
|
||||
srcAllocation->getGpuAddress(), // srcGpuAddress
|
||||
copySize, // copySize
|
||||
dstOffset, // dstOffset
|
||||
srcOffset}; // srcOffset
|
||||
}
|
||||
|
||||
BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
|
||||
GraphicsAllocation *allocation) {
|
||||
|
||||
auto allocationSize = allocation->getUnderlyingBufferSize();
|
||||
return {
|
||||
nullptr, // outputTimestampPacket
|
||||
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
|
||||
{}, // csrDependencies
|
||||
auxTranslationDirection, // auxTranslationDirection
|
||||
allocation, // dstAllocation
|
||||
allocation, // srcAllocation
|
||||
allocation->getGpuAddress(), // dstGpuAddress
|
||||
allocation->getGpuAddress(), // srcGpuAddress
|
||||
allocationSize, // copySize
|
||||
0, // dstOffset
|
||||
0 // srcOffset
|
||||
};
|
||||
}
|
||||
|
||||
void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies,
|
||||
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
|
||||
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr) {
|
||||
auto numObjects = blitPropertiesContainer.size() / 2;
|
||||
|
||||
for (size_t i = 0; i < numObjects; i++) {
|
||||
blitPropertiesContainer[i].outputTimestampPacket = timestampPacketDependencies.auxToNonAuxNodes.peekNodes()[i];
|
||||
blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i];
|
||||
}
|
||||
|
||||
gpguCsr.requestStallingPipeControlOnNextFlush();
|
||||
auto nodesAllocator = gpguCsr.getTimestampPacketAllocator();
|
||||
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
|
||||
|
||||
// wait for barrier and events before AuxToNonAux
|
||||
blitPropertiesContainer[0].csrDependencies.push_back(×tampPacketDependencies.barrierNodes);
|
||||
|
||||
for (auto dep : depsFromEvents) {
|
||||
blitPropertiesContainer[0].csrDependencies.push_back(dep);
|
||||
}
|
||||
|
||||
// wait for NDR before NonAuxToAux
|
||||
blitPropertiesContainer[numObjects].csrDependencies.push_back(&kernelTimestamps);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
73
shared/source/helpers/blit_commands_helper.h
Normal file
73
shared/source/helpers/blit_commands_helper.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/csr_deps.h"
|
||||
#include "helpers/aux_translation.h"
|
||||
#include "memory_manager/memory_constants.h"
|
||||
#include "utilities/stackvec.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class GraphicsAllocation;
|
||||
class LinearStream;
|
||||
struct TimestampPacketStorage;
|
||||
struct RootDeviceEnvironment;
|
||||
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
|
||||
struct BlitProperties;
|
||||
struct HardwareInfo;
|
||||
struct TimestampPacketDependencies;
|
||||
using BlitPropertiesContainer = StackVec<BlitProperties, 16>;
|
||||
|
||||
struct BlitProperties {
|
||||
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
|
||||
CommandStreamReceiver &commandStreamReceiver,
|
||||
GraphicsAllocation *memObjAllocation,
|
||||
GraphicsAllocation *preallocatedHostAllocation,
|
||||
void *hostPtr, uint64_t memObjGpuVa,
|
||||
uint64_t hostAllocGpuVa, size_t hostPtrOffset,
|
||||
size_t copyOffset, uint64_t copySize);
|
||||
|
||||
static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
|
||||
size_t dstOffset, size_t srcOffset, uint64_t copySize);
|
||||
|
||||
static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
|
||||
GraphicsAllocation *allocation);
|
||||
|
||||
static void setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies,
|
||||
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
|
||||
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType);
|
||||
|
||||
TagNode<TimestampPacketStorage> *outputTimestampPacket = nullptr;
|
||||
BlitterConstants::BlitDirection blitDirection;
|
||||
CsrDependencies csrDependencies;
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
|
||||
GraphicsAllocation *dstAllocation = nullptr;
|
||||
GraphicsAllocation *srcAllocation = nullptr;
|
||||
uint64_t dstGpuAddress = 0;
|
||||
uint64_t srcGpuAddress = 0;
|
||||
uint64_t copySize = 0;
|
||||
size_t dstOffset = 0;
|
||||
size_t srcOffset = 0;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct BlitCommandsHelper {
|
||||
static size_t estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket);
|
||||
static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo);
|
||||
static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
};
|
||||
} // namespace NEO
|
||||
91
shared/source/helpers/blit_commands_helper_base.inl
Normal file
91
shared/source/helpers/blit_commands_helper_base.inl
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/blit_commands_helper.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/timestamp_packet.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) {
|
||||
size_t numberOfBlits = 0;
|
||||
uint64_t sizeToBlit = copySize;
|
||||
uint64_t width = 1;
|
||||
uint64_t height = 1;
|
||||
|
||||
while (sizeToBlit != 0) {
|
||||
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
|
||||
// 2D: maxBlitWidth x (1 .. maxBlitHeight)
|
||||
width = BlitterConstants::maxBlitWidth;
|
||||
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
|
||||
} else {
|
||||
// 1D: (1 .. maxBlitWidth) x 1
|
||||
width = sizeToBlit;
|
||||
height = 1;
|
||||
}
|
||||
sizeToBlit -= (width * height);
|
||||
numberOfBlits++;
|
||||
}
|
||||
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
|
||||
(sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
|
||||
(sizeof(typename GfxFamily::MI_FLUSH_DW) * static_cast<size_t>(updateTimestampPacket));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo) {
|
||||
size_t size = 0;
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies,
|
||||
blitProperties.outputTimestampPacket != nullptr);
|
||||
}
|
||||
size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo);
|
||||
size += sizeof(typename GfxFamily::MI_FLUSH_DW) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
|
||||
|
||||
return alignUp(size, MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
uint64_t sizeToBlit = blitProperties.copySize;
|
||||
uint64_t width = 1;
|
||||
uint64_t height = 1;
|
||||
uint64_t offset = 0;
|
||||
|
||||
while (sizeToBlit != 0) {
|
||||
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
|
||||
// dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
|
||||
width = BlitterConstants::maxBlitWidth;
|
||||
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
|
||||
} else {
|
||||
// dispatch 1D blt: (1 .. maxBlitWidth) x 1
|
||||
width = sizeToBlit;
|
||||
height = 1;
|
||||
}
|
||||
|
||||
auto bltCmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
|
||||
*bltCmd = GfxFamily::cmdInitXyCopyBlt;
|
||||
|
||||
bltCmd->setTransferWidth(static_cast<uint32_t>(width));
|
||||
bltCmd->setTransferHeight(static_cast<uint32_t>(height));
|
||||
|
||||
bltCmd->setDestinationPitch(static_cast<uint32_t>(width));
|
||||
bltCmd->setSourcePitch(static_cast<uint32_t>(width));
|
||||
|
||||
bltCmd->setDestinationBaseAddress(blitProperties.dstGpuAddress + blitProperties.dstOffset + offset);
|
||||
bltCmd->setSourceBaseAddress(blitProperties.srcGpuAddress + blitProperties.srcOffset + offset);
|
||||
|
||||
appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment);
|
||||
|
||||
auto blitSize = width * height;
|
||||
sizeToBlit -= blitSize;
|
||||
offset += blitSize;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
15
shared/source/helpers/blit_commands_helper_bdw_plus.inl
Normal file
15
shared/source/helpers/blit_commands_helper_bdw_plus.inl
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/blit_commands_helper_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BlitCommandsHelper<GfxFamily>::appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
|
||||
} // namespace NEO
|
||||
24
shared/source/helpers/cache_policy.cpp
Normal file
24
shared/source/helpers/cache_policy.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/cache_policy.h"
|
||||
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool isL3Capable(void *ptr, size_t size) {
|
||||
return isAligned<MemoryConstants::cacheLineSize>(ptr) &&
|
||||
isAligned<MemoryConstants::cacheLineSize>(size);
|
||||
}
|
||||
|
||||
bool isL3Capable(const NEO::GraphicsAllocation &graphicsAllocation) {
|
||||
return isL3Capable(graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
19
shared/source/helpers/cache_policy.h
Normal file
19
shared/source/helpers/cache_policy.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "gmm_helper/gmm_lib.h"
|
||||
|
||||
namespace CacheSettings {
|
||||
constexpr uint32_t unknownMocs = GMM_RESOURCE_USAGE_UNKNOWN;
|
||||
} // namespace CacheSettings
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
bool isL3Capable(void *ptr, size_t size);
|
||||
bool isL3Capable(const GraphicsAllocation &graphicsAllocation);
|
||||
} // namespace NEO
|
||||
16
shared/source/helpers/common_types.h
Normal file
16
shared/source/helpers/common_types.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
namespace NEO {
|
||||
struct EngineControl;
|
||||
using EngineControlContainer = std::vector<EngineControl>;
|
||||
using DeviceBitfield = std::bitset<32>;
|
||||
} // namespace NEO
|
||||
14
shared/source/helpers/completion_stamp.cpp
Normal file
14
shared/source/helpers/completion_stamp.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/completion_stamp.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const uint32_t CompletionStamp::levelNotReady = 0xFFFFFFF0;
|
||||
|
||||
} // namespace NEO
|
||||
22
shared/source/helpers/completion_stamp.h
Normal file
22
shared/source/helpers/completion_stamp.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
typedef uint64_t FlushStamp;
|
||||
struct CompletionStamp {
|
||||
uint32_t taskCount;
|
||||
uint32_t taskLevel;
|
||||
FlushStamp flushStamp;
|
||||
|
||||
static const uint32_t levelNotReady;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
26
shared/source/helpers/debug_helpers.cpp
Normal file
26
shared/source/helpers/debug_helpers.cpp
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/debug_helpers.h"
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <cstdio>
|
||||
|
||||
namespace NEO {
|
||||
void debugBreak(int line, const char *file) {
|
||||
if (DebugManager.flags.EnableDebugBreak.get()) {
|
||||
printf("Assert was called at %d line in file:\n%s\n", line, file);
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
void abortUnrecoverable(int line, const char *file) {
|
||||
printf("Abort was called at %d line in file:\n%s\n", line, file);
|
||||
abortExecution();
|
||||
}
|
||||
} // namespace NEO
|
||||
36
shared/source/helpers/debug_helpers.h
Normal file
36
shared/source/helpers/debug_helpers.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/abort.h"
|
||||
|
||||
#define UNRECOVERABLE_IF(expression) \
|
||||
\
|
||||
if (expression) { \
|
||||
NEO::abortUnrecoverable(__LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#define UNREACHABLE(...) std::abort()
|
||||
|
||||
#ifndef DEBUG_BREAK_IF
|
||||
#ifdef _DEBUG
|
||||
#define DEBUG_BREAK_IF(expression) \
|
||||
\
|
||||
if (expression) { \
|
||||
NEO::debugBreak(__LINE__, __FILE__); \
|
||||
}
|
||||
#else
|
||||
#define DEBUG_BREAK_IF(expression) (void)0
|
||||
#endif // _DEBUG
|
||||
#endif // !DEBUG_BREAK_IF
|
||||
|
||||
#define UNUSED_VARIABLE(x) ((void)(x))
|
||||
|
||||
namespace NEO {
|
||||
void debugBreak(int line, const char *file);
|
||||
[[noreturn]] void abortUnrecoverable(int line, const char *file);
|
||||
} // namespace NEO
|
||||
10
shared/source/helpers/deferred_deleter_helper.h
Normal file
10
shared/source/helpers/deferred_deleter_helper.h
Normal file
@@ -0,0 +1,10 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
namespace NEO {
|
||||
bool isDeferredDeleterEnabled();
|
||||
} // namespace NEO
|
||||
25
shared/source/helpers/dirty_state_helpers.cpp
Normal file
25
shared/source/helpers/dirty_state_helpers.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/dirty_state_helpers.h"
|
||||
|
||||
#include "indirect_heap/indirect_heap.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
bool HeapDirtyState::updateAndCheck(const IndirectHeap *heap) {
|
||||
if (!heap->getGraphicsAllocation()) {
|
||||
sizeInPages = 0llu;
|
||||
return true;
|
||||
}
|
||||
bool dirty = gpuBaseAddress != heap->getHeapGpuBase() || sizeInPages != heap->getHeapSizeInPages();
|
||||
if (dirty) {
|
||||
gpuBaseAddress = heap->getHeapGpuBase();
|
||||
sizeInPages = heap->getHeapSizeInPages();
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
23
shared/source/helpers/dirty_state_helpers.h
Normal file
23
shared/source/helpers/dirty_state_helpers.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace NEO {
|
||||
class IndirectHeap;
|
||||
|
||||
class HeapDirtyState {
|
||||
public:
|
||||
bool updateAndCheck(const IndirectHeap *heap);
|
||||
|
||||
protected:
|
||||
uint64_t gpuBaseAddress = 0llu;
|
||||
size_t sizeInPages = 0u;
|
||||
};
|
||||
} // namespace NEO
|
||||
22
shared/source/helpers/engine_control.h
Normal file
22
shared/source/helpers/engine_control.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class OsContext;
|
||||
|
||||
struct EngineControl {
|
||||
EngineControl() = default;
|
||||
EngineControl(CommandStreamReceiver *commandStreamReceiver, OsContext *osContext)
|
||||
: commandStreamReceiver(commandStreamReceiver), osContext(osContext){};
|
||||
|
||||
CommandStreamReceiver *commandStreamReceiver = nullptr;
|
||||
OsContext *osContext = nullptr;
|
||||
};
|
||||
} // namespace NEO
|
||||
24
shared/source/helpers/engine_node_helper.cpp
Normal file
24
shared/source/helpers/engine_node_helper.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/engine_node_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
namespace EngineHelpers {
|
||||
bool isCcs(aub_stream::EngineType engineType) {
|
||||
return engineType == aub_stream::ENGINE_CCS;
|
||||
}
|
||||
|
||||
bool isBcs(aub_stream::EngineType engineType) {
|
||||
return engineType == aub_stream::ENGINE_BCS;
|
||||
}
|
||||
|
||||
aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo) {
|
||||
return aub_stream::EngineType::ENGINE_BCS;
|
||||
}
|
||||
} // namespace EngineHelpers
|
||||
} // namespace NEO
|
||||
20
shared/source/helpers/engine_node_helper.h
Normal file
20
shared/source/helpers/engine_node_helper.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
namespace NEO {
|
||||
struct HardwareInfo;
|
||||
|
||||
namespace EngineHelpers {
|
||||
bool isCcs(aub_stream::EngineType engineType);
|
||||
bool isBcs(aub_stream::EngineType engineType);
|
||||
aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo);
|
||||
}; // namespace EngineHelpers
|
||||
} // namespace NEO
|
||||
21
shared/source/helpers/extendable_enum.h
Normal file
21
shared/source/helpers/extendable_enum.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
struct ExtendableEnum {
|
||||
constexpr operator uint32_t() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
constexpr ExtendableEnum(uint32_t val) : value(val) {}
|
||||
|
||||
protected:
|
||||
uint32_t value;
|
||||
};
|
||||
98
shared/source/helpers/file_io.cpp
Normal file
98
shared/source/helpers/file_io.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "file_io.h"
|
||||
|
||||
#include "helpers/debug_helpers.h"
|
||||
#include "helpers/stdio.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <new>
|
||||
|
||||
std::unique_ptr<char[]> loadDataFromFile(
|
||||
const char *filename,
|
||||
size_t &retSize) {
|
||||
FILE *fp = nullptr;
|
||||
size_t nsize = 0;
|
||||
std::unique_ptr<char[]> ret;
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == filename);
|
||||
// Open the file
|
||||
fopen_s(&fp, filename, "rb");
|
||||
if (fp) {
|
||||
// Allocate a buffer for the file contents
|
||||
fseek(fp, 0, SEEK_END);
|
||||
nsize = (size_t)ftell(fp);
|
||||
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
ret.reset(new (std::nothrow) char[nsize + 1]);
|
||||
|
||||
if (ret) {
|
||||
// we initialize to all zeroes before reading in data
|
||||
memset(ret.get(), 0x00, nsize + 1);
|
||||
auto read = fread(ret.get(), sizeof(unsigned char), nsize, fp);
|
||||
DEBUG_BREAK_IF(read != nsize);
|
||||
UNUSED_VARIABLE(read);
|
||||
} else {
|
||||
nsize = 0;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
retSize = nsize;
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t writeDataToFile(
|
||||
const char *filename,
|
||||
const void *pData,
|
||||
size_t dataSize) {
|
||||
FILE *fp = nullptr;
|
||||
size_t nsize = 0;
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == pData);
|
||||
DEBUG_BREAK_IF(nullptr == filename);
|
||||
|
||||
fopen_s(&fp, filename, "wb");
|
||||
if (fp) {
|
||||
nsize = fwrite(pData, sizeof(unsigned char), dataSize, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return nsize;
|
||||
}
|
||||
|
||||
bool fileExists(const std::string &fileName) {
|
||||
FILE *pFile = nullptr;
|
||||
|
||||
DEBUG_BREAK_IF(fileName.empty());
|
||||
DEBUG_BREAK_IF(fileName == "");
|
||||
|
||||
fopen_s(&pFile, fileName.c_str(), "rb");
|
||||
if (pFile) {
|
||||
fclose(pFile);
|
||||
}
|
||||
return pFile != nullptr;
|
||||
}
|
||||
|
||||
bool fileExistsHasSize(const std::string &fileName) {
|
||||
FILE *pFile = nullptr;
|
||||
size_t nsize = 0;
|
||||
|
||||
DEBUG_BREAK_IF(fileName.empty());
|
||||
DEBUG_BREAK_IF(fileName == "");
|
||||
|
||||
fopen_s(&pFile, fileName.c_str(), "rb");
|
||||
if (pFile) {
|
||||
fseek(pFile, 0, SEEK_END);
|
||||
nsize = (size_t)ftell(pFile);
|
||||
fclose(pFile);
|
||||
}
|
||||
return pFile != nullptr && nsize > 0;
|
||||
}
|
||||
24
shared/source/helpers/file_io.h
Normal file
24
shared/source/helpers/file_io.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
std::unique_ptr<char[]> loadDataFromFile(
|
||||
const char *filename,
|
||||
size_t &retSize);
|
||||
|
||||
size_t writeDataToFile(
|
||||
const char *filename,
|
||||
const void *pData,
|
||||
size_t dataSize);
|
||||
|
||||
bool fileExists(const std::string &fileName);
|
||||
bool fileExistsHasSize(const std::string &fileName);
|
||||
70
shared/source/helpers/flat_batch_buffer_helper.cpp
Normal file
70
shared/source/helpers/flat_batch_buffer_helper.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/flat_batch_buffer_helper.h"
|
||||
|
||||
#include "execution_environment/execution_environment.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool FlatBatchBufferHelper::setPatchInfoData(const PatchInfoData &data) {
|
||||
patchInfoCollection.push_back(data);
|
||||
return true;
|
||||
}
|
||||
bool FlatBatchBufferHelper::removePatchInfoData(uint64_t targetLocation) {
|
||||
for (auto it = patchInfoCollection.begin(); it != patchInfoCollection.end(); ++it) {
|
||||
if (it->targetAllocation + it->targetAllocationOffset == targetLocation) {
|
||||
patchInfoCollection.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset) {
|
||||
|
||||
CommandChunk commandChunk;
|
||||
commandChunk.baseAddressGpu = baseGpu;
|
||||
commandChunk.baseAddressCpu = baseCpu;
|
||||
commandChunk.startOffset = startOffset;
|
||||
commandChunk.endOffset = endOffset;
|
||||
return registerCommandChunk(commandChunk);
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize) {
|
||||
CommandChunk commandChunk;
|
||||
commandChunk.baseAddressGpu = batchBuffer.stream->getGraphicsAllocation()->getGpuAddress();
|
||||
commandChunk.baseAddressCpu = reinterpret_cast<uint64_t>(batchBuffer.stream->getCpuBase());
|
||||
commandChunk.startOffset = batchBuffer.startOffset;
|
||||
commandChunk.endOffset = batchBuffer.chainedBatchBufferStartOffset + batchBufferStartCommandSize;
|
||||
return registerCommandChunk(commandChunk);
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerCommandChunk(CommandChunk &commandChunk) {
|
||||
commandChunkList.push_back(commandChunk);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FlatBatchBufferHelper::registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress) {
|
||||
batchBufferStartAddressSequence.insert(std::pair<uint64_t, uint64_t>(commandAddress, startAddress));
|
||||
return true;
|
||||
}
|
||||
|
||||
void FlatBatchBufferHelper::fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress) {
|
||||
for (auto &patchInfoData : data) {
|
||||
if (patchInfoData.sourceType == PatchInfoAllocationType::KernelArg) {
|
||||
patchInfoData.targetAllocation = gpuAddress;
|
||||
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemoryManager *FlatBatchBufferHelper::getMemoryManager() const {
|
||||
return executionEnvironemnt.memoryManager.get();
|
||||
}
|
||||
}; // namespace NEO
|
||||
51
shared/source/helpers/flat_batch_buffer_helper.h
Normal file
51
shared/source/helpers/flat_batch_buffer_helper.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "command_stream/submissions_aggregator.h"
|
||||
#include "helpers/address_patch.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum class DispatchMode;
|
||||
class MemoryManager;
|
||||
class ExecutionEnvironment;
|
||||
|
||||
class FlatBatchBufferHelper {
|
||||
public:
|
||||
FlatBatchBufferHelper(ExecutionEnvironment &executionEnvironemnt) : executionEnvironemnt(executionEnvironemnt) {}
|
||||
virtual ~FlatBatchBufferHelper(){};
|
||||
MOCKABLE_VIRTUAL bool setPatchInfoData(const PatchInfoData &data);
|
||||
MOCKABLE_VIRTUAL bool removePatchInfoData(uint64_t targetLocation);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(CommandChunk &commandChunk);
|
||||
MOCKABLE_VIRTUAL bool registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize);
|
||||
MOCKABLE_VIRTUAL bool registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress);
|
||||
virtual GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0;
|
||||
virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) = 0;
|
||||
virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) = 0;
|
||||
virtual void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) = 0;
|
||||
static void fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress);
|
||||
|
||||
std::vector<CommandChunk> &getCommandChunkList() { return commandChunkList; }
|
||||
std::vector<PatchInfoData> &getPatchInfoCollection() { return patchInfoCollection; }
|
||||
std::map<uint64_t, uint64_t> &getBatchBufferStartAddressSequence() { return batchBufferStartAddressSequence; }
|
||||
|
||||
protected:
|
||||
MemoryManager *getMemoryManager() const;
|
||||
ExecutionEnvironment &executionEnvironemnt;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoCollection;
|
||||
std::vector<CommandChunk> commandChunkList;
|
||||
std::map<uint64_t, uint64_t> batchBufferStartAddressSequence;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
24
shared/source/helpers/flat_batch_buffer_helper_hw.h
Normal file
24
shared/source/helpers/flat_batch_buffer_helper_hw.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers/flat_batch_buffer_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class FlatBatchBufferHelperHw : public FlatBatchBufferHelper {
|
||||
public:
|
||||
using FlatBatchBufferHelper::FlatBatchBufferHelper;
|
||||
GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override;
|
||||
char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) override;
|
||||
void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) override;
|
||||
void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) override;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
203
shared/source/helpers/flat_batch_buffer_helper_hw.inl
Normal file
203
shared/source/helpers/flat_batch_buffer_helper_hw.inl
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "helpers/flat_batch_buffer_helper_hw.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/string.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
GraphicsAllocation *FlatBatchBufferHelperHw<GfxFamily>::flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer,
|
||||
DispatchMode dispatchMode) {
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
|
||||
typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
|
||||
typedef typename GfxFamily::MI_USER_INTERRUPT MI_USER_INTERRUPT;
|
||||
|
||||
GraphicsAllocation *flatBatchBuffer = nullptr;
|
||||
|
||||
size_t indirectPatchCommandsSize = 0u;
|
||||
std::vector<PatchInfoData> indirectPatchInfo;
|
||||
std::unique_ptr<char> indirectPatchCommands(getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo));
|
||||
|
||||
if (dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
if (batchBuffer.chainedBatchBuffer) {
|
||||
batchBuffer.chainedBatchBuffer->setAubWritable(false, GraphicsAllocation::defaultBank);
|
||||
auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
|
||||
auto alignedMainBatchBufferSize = alignUp(sizeMainBatchBuffer + indirectPatchCommandsSize + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
|
||||
AllocationProperties flatBatchBufferProperties(rootDeviceIndex, alignedMainBatchBufferSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
|
||||
flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
|
||||
flatBatchBuffer =
|
||||
getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
|
||||
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
|
||||
// Copy main batchbuffer
|
||||
memcpy_s(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer,
|
||||
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
|
||||
sizeMainBatchBuffer);
|
||||
// Copy indirect patch commands
|
||||
memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer), indirectPatchCommandsSize,
|
||||
indirectPatchCommands.get(), indirectPatchCommandsSize);
|
||||
// Copy chained batchbuffer
|
||||
memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer + indirectPatchCommandsSize),
|
||||
batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(),
|
||||
batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
|
||||
sizeBatchBuffer = flatBatchBufferProperties.size;
|
||||
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
|
||||
}
|
||||
} else if (dispatchMode == DispatchMode::BatchedDispatch) {
|
||||
CommandChunk firstChunk;
|
||||
for (auto &chunk : commandChunkList) {
|
||||
bool found = false;
|
||||
for (auto &batchBuffer : batchBufferStartAddressSequence) {
|
||||
if ((batchBuffer.first <= chunk.baseAddressGpu + chunk.endOffset) && (batchBuffer.first >= chunk.baseAddressGpu + chunk.startOffset)) {
|
||||
chunk.batchBufferStartLocation = batchBuffer.first;
|
||||
chunk.batchBufferStartAddress = batchBuffer.second;
|
||||
chunk.endOffset = chunk.batchBufferStartLocation - chunk.baseAddressGpu;
|
||||
}
|
||||
if (batchBuffer.second == chunk.baseAddressGpu + chunk.startOffset) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
firstChunk = chunk;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CommandChunk> orderedChunks;
|
||||
CommandChunk &nextChunk = firstChunk;
|
||||
while (true) {
|
||||
bool hasNextChunk = false;
|
||||
for (auto &chunk : commandChunkList) {
|
||||
if (nextChunk.batchBufferStartAddress == chunk.baseAddressGpu + chunk.startOffset) {
|
||||
hasNextChunk = true;
|
||||
orderedChunks.push_back(nextChunk);
|
||||
nextChunk = chunk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasNextChunk) {
|
||||
nextChunk.endOffset -= sizeof(MI_BATCH_BUFFER_START);
|
||||
orderedChunks.push_back(nextChunk);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t flatBatchBufferSize = 0u;
|
||||
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
|
||||
patchInfoCollection.clear();
|
||||
|
||||
for (auto &chunk : orderedChunks) {
|
||||
for (auto &patch : patchInfoCopy) {
|
||||
if (patch.targetAllocation + patch.targetAllocationOffset >= chunk.baseAddressGpu + chunk.startOffset && patch.targetAllocation + patch.targetAllocationOffset <= chunk.baseAddressGpu + chunk.endOffset) {
|
||||
patch.targetAllocationOffset = patch.targetAllocationOffset - chunk.startOffset + flatBatchBufferSize + indirectPatchCommandsSize;
|
||||
patchInfoCollection.push_back(patch);
|
||||
}
|
||||
}
|
||||
flatBatchBufferSize += chunk.endOffset - chunk.startOffset;
|
||||
}
|
||||
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
|
||||
|
||||
flatBatchBufferSize += sizeof(MI_USER_INTERRUPT);
|
||||
flatBatchBufferSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
flatBatchBufferSize += indirectPatchCommandsSize;
|
||||
|
||||
flatBatchBufferSize = alignUp(flatBatchBufferSize, MemoryConstants::pageSize);
|
||||
flatBatchBufferSize += CSRequirements::csOverfetchSize;
|
||||
AllocationProperties flatBatchBufferProperties(rootDeviceIndex, static_cast<size_t>(flatBatchBufferSize), GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
|
||||
flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
|
||||
flatBatchBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
|
||||
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
|
||||
|
||||
char *ptr = static_cast<char *>(flatBatchBuffer->getUnderlyingBuffer());
|
||||
memcpy_s(ptr, indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
|
||||
ptr += indirectPatchCommandsSize;
|
||||
for (auto &chunk : orderedChunks) {
|
||||
size_t chunkSize = static_cast<size_t>(chunk.endOffset - chunk.startOffset);
|
||||
memcpy_s(ptr,
|
||||
chunkSize,
|
||||
reinterpret_cast<char *>(ptrOffset(chunk.baseAddressCpu, static_cast<size_t>(chunk.startOffset))),
|
||||
chunkSize);
|
||||
ptr += chunkSize;
|
||||
}
|
||||
|
||||
auto pCmdMui = reinterpret_cast<MI_USER_INTERRUPT *>(ptr);
|
||||
*pCmdMui = GfxFamily::cmdInitUserInterrupt;
|
||||
ptr += sizeof(MI_USER_INTERRUPT);
|
||||
|
||||
auto pCmdBBend = reinterpret_cast<MI_BATCH_BUFFER_END *>(ptr);
|
||||
*pCmdBBend = GfxFamily::cmdInitBatchBufferEnd;
|
||||
ptr += sizeof(MI_BATCH_BUFFER_END);
|
||||
|
||||
sizeBatchBuffer = static_cast<size_t>(flatBatchBufferSize);
|
||||
commandChunkList.clear();
|
||||
batchBufferStartAddressSequence.clear();
|
||||
}
|
||||
|
||||
return flatBatchBuffer;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
char *FlatBatchBufferHelperHw<GfxFamily>::getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) {
|
||||
typedef typename GfxFamily::MI_STORE_DATA_IMM MI_STORE_DATA_IMM;
|
||||
|
||||
indirectPatchCommandsSize = 0;
|
||||
for (auto &patchInfoData : patchInfoCollection) {
|
||||
if (patchInfoData.requiresIndirectPatching()) {
|
||||
indirectPatchCommandsSize += sizeof(MI_STORE_DATA_IMM);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t stiCommandOffset = 0;
|
||||
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
|
||||
std::unique_ptr<char> buffer(new char[indirectPatchCommandsSize]);
|
||||
LinearStream indirectPatchCommandStream(buffer.get(), indirectPatchCommandsSize);
|
||||
patchInfoCollection.clear();
|
||||
|
||||
for (auto &patchInfoData : patchInfoCopy) {
|
||||
if (patchInfoData.requiresIndirectPatching()) {
|
||||
auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
|
||||
*storeDataImmediate = GfxFamily::cmdInitStoreDataImm;
|
||||
storeDataImmediate->setAddress(patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset);
|
||||
storeDataImmediate->setStoreQword(patchInfoData.patchAddressSize != sizeof(uint32_t));
|
||||
storeDataImmediate->setDataDword0(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL));
|
||||
storeDataImmediate->setDataDword1(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32));
|
||||
|
||||
PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default);
|
||||
PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default);
|
||||
indirectPatchInfo.push_back(patchInfoForAddress);
|
||||
indirectPatchInfo.push_back(patchInfoForValue);
|
||||
stiCommandOffset += sizeof(MI_STORE_DATA_IMM);
|
||||
} else {
|
||||
patchInfoCollection.push_back(patchInfoData);
|
||||
}
|
||||
}
|
||||
return buffer.release();
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void FlatBatchBufferHelperHw<GfxFamily>::removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) {
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
size_t numPipeControls = (pipeControlLocationSize - MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo)) / (sizeof(PIPE_CONTROL));
|
||||
for (size_t i = 0; i < numPipeControls; i++) {
|
||||
PIPE_CONTROL *erasedPipeControl = reinterpret_cast<PIPE_CONTROL *>(pipeControlForNooping);
|
||||
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - 2 * sizeof(uint64_t));
|
||||
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - sizeof(uint64_t));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void FlatBatchBufferHelperHw<GfxFamily>::collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) {
|
||||
if (scratchAddress) {
|
||||
auto scratchOffset = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(csr.getCpuBase()) + commandOffset)[0] & 0x3FF;
|
||||
PatchInfoData patchInfoData(scratchAddress, scratchOffset, PatchInfoAllocationType::ScratchSpace, csr.getGraphicsAllocation()->getGpuAddress(), commandOffset, PatchInfoAllocationType::Default);
|
||||
patchInfoCollection.push_back(patchInfoData);
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace NEO
|
||||
65
shared/source/helpers/flush_stamp.cpp
Normal file
65
shared/source/helpers/flush_stamp.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/flush_stamp.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
FlushStampTracker::FlushStampTracker(bool allocateStamp) {
|
||||
if (allocateStamp) {
|
||||
flushStampSharedHandle = new FlushStampTrackingObj();
|
||||
flushStampSharedHandle->incRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
FlushStampTracker::~FlushStampTracker() {
|
||||
if (flushStampSharedHandle) {
|
||||
flushStampSharedHandle->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
FlushStamp FlushStampTracker::peekStamp() const {
|
||||
if (flushStampSharedHandle->initialized) {
|
||||
return flushStampSharedHandle->flushStamp;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void FlushStampTracker::setStamp(FlushStamp stamp) {
|
||||
if (stamp != 0) {
|
||||
flushStampSharedHandle->flushStamp = stamp;
|
||||
flushStampSharedHandle->initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
void FlushStampTracker::replaceStampObject(FlushStampTrackingObj *stampObj) {
|
||||
if (stampObj) {
|
||||
stampObj->incRefInternal();
|
||||
if (flushStampSharedHandle) {
|
||||
flushStampSharedHandle->decRefInternal();
|
||||
}
|
||||
flushStampSharedHandle = stampObj;
|
||||
}
|
||||
}
|
||||
|
||||
void FlushStampUpdateHelper::insert(FlushStampTrackingObj *stampObj) {
|
||||
if (stampObj) {
|
||||
flushStampsToUpdate.push_back(stampObj);
|
||||
}
|
||||
}
|
||||
|
||||
void FlushStampUpdateHelper::updateAll(const FlushStamp &flushStamp) {
|
||||
for (const auto &stamp : flushStampsToUpdate) {
|
||||
stamp->flushStamp = flushStamp;
|
||||
stamp->initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
size_t FlushStampUpdateHelper::size() const {
|
||||
return flushStampsToUpdate.size();
|
||||
}
|
||||
48
shared/source/helpers/flush_stamp.h
Normal file
48
shared/source/helpers/flush_stamp.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers/completion_stamp.h"
|
||||
#include "utilities/reference_tracked_object.h"
|
||||
#include "utilities/stackvec.h"
|
||||
|
||||
namespace NEO {
|
||||
struct FlushStampTrackingObj : public ReferenceTrackedObject<FlushStampTrackingObj> {
|
||||
FlushStamp flushStamp = 0;
|
||||
std::atomic<bool> initialized{false};
|
||||
};
|
||||
|
||||
class FlushStampTracker {
|
||||
public:
|
||||
FlushStampTracker() = delete;
|
||||
FlushStampTracker(bool allocateStamp);
|
||||
~FlushStampTracker();
|
||||
|
||||
FlushStamp peekStamp() const;
|
||||
void setStamp(FlushStamp stamp);
|
||||
void replaceStampObject(FlushStampTrackingObj *stampObj);
|
||||
|
||||
// Temporary. Method will be removed
|
||||
FlushStampTrackingObj *getStampReference() {
|
||||
return flushStampSharedHandle;
|
||||
}
|
||||
|
||||
protected:
|
||||
FlushStampTrackingObj *flushStampSharedHandle = nullptr;
|
||||
};
|
||||
|
||||
class FlushStampUpdateHelper {
|
||||
public:
|
||||
void insert(FlushStampTrackingObj *stampObj);
|
||||
void updateAll(const FlushStamp &flushStamp);
|
||||
size_t size() const;
|
||||
|
||||
private:
|
||||
StackVec<FlushStampTrackingObj *, 64> flushStampsToUpdate;
|
||||
};
|
||||
} // namespace NEO
|
||||
96
shared/source/helpers/get_info.h
Normal file
96
shared/source/helpers/get_info.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "get_info_status.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
// Need for linux compatibility with memcpy_s
|
||||
#include "helpers/string.h"
|
||||
|
||||
inline GetInfoStatus getInfo(void *destParamValue, size_t destParamValueSize,
|
||||
const void *srcParamValue, size_t srcParamValueSize) {
|
||||
auto retVal = GetInfoStatus::INVALID_VALUE;
|
||||
if (srcParamValue && srcParamValueSize) {
|
||||
if (!destParamValue && !destParamValueSize) {
|
||||
// Report ok if they're looking for size.
|
||||
retVal = GetInfoStatus::SUCCESS;
|
||||
} else if (destParamValue && destParamValueSize >= srcParamValueSize) {
|
||||
// Report ok if we can copy safely
|
||||
retVal = GetInfoStatus::SUCCESS;
|
||||
|
||||
memcpy_s(destParamValue, destParamValueSize, srcParamValue, srcParamValueSize);
|
||||
} else if (!destParamValue) {
|
||||
// Report ok if destParamValue == nullptr and destParamValueSize > 0
|
||||
retVal = GetInfoStatus::SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
struct GetInfoHelper {
|
||||
GetInfoHelper(void *dst, size_t dstSize, size_t *retSize, GetInfoStatus *retVal = nullptr)
|
||||
: dst(dst), dstSize(dstSize), retSize(retSize), retVal(retVal) {
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
GetInfoStatus set(const DataType &val) {
|
||||
auto errCode = GetInfoStatus::SUCCESS;
|
||||
if (retSize != nullptr) {
|
||||
*retSize = sizeof(val);
|
||||
}
|
||||
if (dst != nullptr) {
|
||||
if (dstSize >= sizeof(val)) {
|
||||
*reinterpret_cast<DataType *>(dst) = val;
|
||||
} else {
|
||||
errCode = GetInfoStatus::INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
if (retVal)
|
||||
*retVal = errCode;
|
||||
return errCode;
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
static void set(DataType *dst, DataType val) {
|
||||
if (dst) {
|
||||
*dst = val;
|
||||
}
|
||||
}
|
||||
|
||||
void *dst;
|
||||
size_t dstSize;
|
||||
size_t *retSize;
|
||||
GetInfoStatus *retVal;
|
||||
};
|
||||
|
||||
struct ErrorCodeHelper {
|
||||
ErrorCodeHelper(int *errcodeRet, int defaultCode)
|
||||
: errcodeRet(errcodeRet) {
|
||||
set(defaultCode);
|
||||
}
|
||||
|
||||
void set(int code) {
|
||||
if (errcodeRet != nullptr) {
|
||||
*errcodeRet = code;
|
||||
}
|
||||
localErrcode = code;
|
||||
}
|
||||
|
||||
int *errcodeRet;
|
||||
int localErrcode;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T getValidParam(T param, T defaultVal = 1, T invalidVal = 0) {
|
||||
if (param == invalidVal) {
|
||||
return defaultVal;
|
||||
}
|
||||
return param;
|
||||
}
|
||||
14
shared/source/helpers/get_info_status.h
Normal file
14
shared/source/helpers/get_info_status.h
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
enum class GetInfoStatus {
|
||||
INVALID_CONTEXT = -2,
|
||||
INVALID_VALUE = -1,
|
||||
SUCCESS = 0
|
||||
};
|
||||
117
shared/source/helpers/hash.h
Normal file
117
shared/source/helpers/hash.h
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "utilities/compiler_support.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
// clang-format off
|
||||
#define HASH_JENKINS_MIX(a,b,c) \
|
||||
{ \
|
||||
a -= b; a -= c; a ^= (c>>13); \
|
||||
b -= c; b -= a; b ^= (a<<8); \
|
||||
c -= a; c -= b; c ^= (b>>13); \
|
||||
a -= b; a -= c; a ^= (c>>12); \
|
||||
b -= c; b -= a; b ^= (a<<16); \
|
||||
c -= a; c -= b; c ^= (b>>5); \
|
||||
a -= b; a -= c; a ^= (c>>3); \
|
||||
b -= c; b -= a; b ^= (a<<10); \
|
||||
c -= a; c -= b; c ^= (b>>15); \
|
||||
}
|
||||
// clang-format on
|
||||
class Hash {
|
||||
public:
|
||||
Hash() {
|
||||
reset();
|
||||
};
|
||||
|
||||
uint32_t getValue(const char *data, size_t size) {
|
||||
uint32_t value = 0;
|
||||
switch (size) {
|
||||
case 3:
|
||||
value = static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
|
||||
value <<= 8;
|
||||
CPP_ATTRIBUTE_FALLTHROUGH;
|
||||
case 2:
|
||||
value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
|
||||
value <<= 8;
|
||||
CPP_ATTRIBUTE_FALLTHROUGH;
|
||||
case 1:
|
||||
value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
|
||||
value <<= 8;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
void update(const char *buff, size_t size) {
|
||||
if (buff == nullptr)
|
||||
return;
|
||||
|
||||
if ((reinterpret_cast<uintptr_t>(buff) & 0x3) != 0) {
|
||||
const unsigned char *tmp = (const unsigned char *)buff;
|
||||
|
||||
while (size >= sizeof(uint32_t)) {
|
||||
uint32_t value = (uint32_t)tmp[0] + (((uint32_t)tmp[1]) << 8) + ((uint32_t)tmp[2] << 16) + ((uint32_t)tmp[3] << 24);
|
||||
a ^= value;
|
||||
HASH_JENKINS_MIX(a, hi, lo);
|
||||
size -= sizeof(uint32_t);
|
||||
tmp += sizeof(uint32_t);
|
||||
}
|
||||
if (size > 0) {
|
||||
uint32_t value = getValue((char *)tmp, size);
|
||||
a ^= value;
|
||||
HASH_JENKINS_MIX(a, hi, lo);
|
||||
}
|
||||
} else {
|
||||
const uint32_t *tmp = reinterpret_cast<const uint32_t *>(buff);
|
||||
|
||||
while (size >= sizeof(*tmp)) {
|
||||
a ^= *(tmp++);
|
||||
HASH_JENKINS_MIX(a, hi, lo);
|
||||
size -= sizeof(*tmp);
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
uint32_t value = getValue((char *)tmp, size);
|
||||
a ^= value;
|
||||
HASH_JENKINS_MIX(a, hi, lo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t finish() {
|
||||
return (((uint64_t)hi) << 32) | lo;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
a = 0x428a2f98;
|
||||
hi = 0x71374491;
|
||||
lo = 0xb5c0fbcf;
|
||||
}
|
||||
|
||||
static uint64_t hash(const char *buff, size_t size) {
|
||||
Hash hash;
|
||||
hash.update(buff, size);
|
||||
return hash.finish();
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t a, hi, lo;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
uint32_t hashPtrToU32(const T *src) {
|
||||
auto asInt = reinterpret_cast<uintptr_t>(src);
|
||||
constexpr auto m = sizeof(uintptr_t) / 8;
|
||||
asInt = asInt ^ ((asInt & ~(m - 1)) >> (m * 32));
|
||||
|
||||
return static_cast<uint32_t>(asInt);
|
||||
}
|
||||
} // namespace NEO
|
||||
35
shared/source/helpers/heap_helper.cpp
Normal file
35
shared/source/helpers/heap_helper.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/heap_helper.h"
|
||||
|
||||
#include "indirect_heap/indirect_heap.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
#include "memory_manager/internal_allocation_storage.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
GraphicsAllocation *HeapHelper::getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex) {
|
||||
auto allocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM;
|
||||
if (IndirectHeap::Type::INDIRECT_OBJECT == heapType) {
|
||||
allocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP;
|
||||
}
|
||||
|
||||
auto allocation = this->storageForReuse->obtainReusableAllocation(heapSize, allocationType);
|
||||
if (allocation) {
|
||||
return allocation.release();
|
||||
}
|
||||
NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, {}};
|
||||
properties.alignment = alignment;
|
||||
|
||||
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
|
||||
}
|
||||
void HeapHelper::storeHeapAllocation(GraphicsAllocation *heapAllocation) {
|
||||
this->storageForReuse->storeAllocation(std::unique_ptr<NEO::GraphicsAllocation>(heapAllocation), NEO::AllocationUsage::REUSABLE_ALLOCATION);
|
||||
}
|
||||
} // namespace NEO
|
||||
31
shared/source/helpers/heap_helper.h
Normal file
31
shared/source/helpers/heap_helper.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class MemoryManager;
|
||||
class GraphicsAllocation;
|
||||
class InternalAllocationStorage;
|
||||
|
||||
class HeapHelper {
|
||||
public:
|
||||
HeapHelper(MemoryManager *memManager, InternalAllocationStorage *storageForReuse, bool isMultiOsContextCapable) : storageForReuse(storageForReuse),
|
||||
memManager(memManager),
|
||||
isMultiOsContextCapable(isMultiOsContextCapable) {}
|
||||
GraphicsAllocation *getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex);
|
||||
void storeHeapAllocation(GraphicsAllocation *heapAllocation);
|
||||
|
||||
protected:
|
||||
InternalAllocationStorage *storageForReuse = nullptr;
|
||||
MemoryManager *memManager = nullptr;
|
||||
bool isMultiOsContextCapable = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
20
shared/source/helpers/hw_cmds.h
Normal file
20
shared/source/helpers/hw_cmds.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifdef SUPPORT_GEN8
|
||||
#include "gen8/hw_cmds.h"
|
||||
#endif
|
||||
#ifdef SUPPORT_GEN9
|
||||
#include "gen9/hw_cmds.h"
|
||||
#endif
|
||||
#ifdef SUPPORT_GEN11
|
||||
#include "gen11/hw_cmds.h"
|
||||
#endif
|
||||
#ifdef SUPPORT_GEN12LP
|
||||
#include "gen12lp/hw_cmds.h"
|
||||
#endif
|
||||
52
shared/source/helpers/hw_helper.cpp
Normal file
52
shared/source/helpers/hw_helper.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/hw_helper.h"
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
HwHelper *hwHelperFactory[IGFX_MAX_CORE] = {};
|
||||
|
||||
HwHelper &HwHelper::get(GFXCORE_FAMILY gfxCore) {
|
||||
return *hwHelperFactory[gfxCore];
|
||||
}
|
||||
|
||||
bool HwHelper::renderCompressedBuffersSupported(const HardwareInfo &hwInfo) {
|
||||
if (DebugManager.flags.RenderCompressedBuffersEnabled.get() != -1) {
|
||||
return !!DebugManager.flags.RenderCompressedBuffersEnabled.get();
|
||||
}
|
||||
return hwInfo.capabilityTable.ftrRenderCompressedBuffers;
|
||||
}
|
||||
|
||||
bool HwHelper::renderCompressedImagesSupported(const HardwareInfo &hwInfo) {
|
||||
if (DebugManager.flags.RenderCompressedImagesEnabled.get() != -1) {
|
||||
return !!DebugManager.flags.RenderCompressedImagesEnabled.get();
|
||||
}
|
||||
return hwInfo.capabilityTable.ftrRenderCompressedImages;
|
||||
}
|
||||
|
||||
bool HwHelper::cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo) {
|
||||
int32_t dbgFlag = DebugManager.flags.EnableCacheFlushAfterWalker.get();
|
||||
if (dbgFlag == 1) {
|
||||
return true;
|
||||
} else if (dbgFlag == 0) {
|
||||
return false;
|
||||
}
|
||||
return hwInfo.capabilityTable.supportCacheFlushAfterWalker;
|
||||
}
|
||||
|
||||
uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
|
||||
uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU;
|
||||
return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
|
||||
}
|
||||
|
||||
uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
|
||||
return maxNumEUsPerSubSlice * numThreadsPerEU;
|
||||
}
|
||||
} // namespace NEO
|
||||
273
shared/source/helpers/hw_helper.h
Normal file
273
shared/source/helpers/hw_helper.h
Normal file
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/linear_stream.h"
|
||||
#include "helpers/aux_translation.h"
|
||||
#include "helpers/hw_cmds.h"
|
||||
#include "opencl/source/built_ins/sip.h"
|
||||
#include "opencl/source/gen_common/aub_mapper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace NEO {
|
||||
class ExecutionEnvironment;
|
||||
class GraphicsAllocation;
|
||||
struct HardwareCapabilities;
|
||||
class GmmHelper;
|
||||
|
||||
class HwHelper {
|
||||
public:
|
||||
static HwHelper &get(GFXCORE_FAMILY gfxCore);
|
||||
virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0;
|
||||
virtual size_t getBindingTableStateSize() const = 0;
|
||||
virtual uint32_t getBindingTableStateAlignement() const = 0;
|
||||
virtual size_t getInterfaceDescriptorDataSize() const = 0;
|
||||
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
|
||||
virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0;
|
||||
virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) = 0;
|
||||
virtual void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) = 0;
|
||||
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
|
||||
virtual void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) = 0;
|
||||
virtual bool isL3Configurable(const HardwareInfo &hwInfo) = 0;
|
||||
virtual SipKernelType getSipKernelType(bool debuggingActive) = 0;
|
||||
virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0;
|
||||
virtual bool hvAlign4Required() const = 0;
|
||||
virtual bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const = 0;
|
||||
virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0;
|
||||
static bool renderCompressedBuffersSupported(const HardwareInfo &hwInfo);
|
||||
static bool renderCompressedImagesSupported(const HardwareInfo &hwInfo);
|
||||
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
|
||||
virtual bool timestampPacketWriteSupported() const = 0;
|
||||
virtual size_t getRenderSurfaceStateSize() const = 0;
|
||||
virtual void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
bool isReadOnly,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) = 0;
|
||||
virtual const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const = 0;
|
||||
virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
|
||||
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual std::string getExtensions() const = 0;
|
||||
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
|
||||
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool requiresAuxResolves() const = 0;
|
||||
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
||||
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
||||
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
|
||||
virtual uint32_t getMinimalSIMDSize() = 0;
|
||||
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
|
||||
|
||||
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
||||
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
||||
|
||||
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
|
||||
static constexpr uint32_t internalUsageEngineIndex = 2;
|
||||
|
||||
protected:
|
||||
HwHelper() = default;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class HwHelperHw : public HwHelper {
|
||||
public:
|
||||
static HwHelper &get() {
|
||||
static HwHelperHw<GfxFamily> hwHelper;
|
||||
return hwHelper;
|
||||
}
|
||||
|
||||
static const aub_stream::EngineType lowPriorityEngineType;
|
||||
|
||||
uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
|
||||
const BINDING_TABLE_STATE *bindingTableState = static_cast<const BINDING_TABLE_STATE *>(pBindingTable);
|
||||
return bindingTableState[index].getRawData(0);
|
||||
}
|
||||
|
||||
size_t getBindingTableStateSize() const override {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
return sizeof(BINDING_TABLE_STATE);
|
||||
}
|
||||
|
||||
uint32_t getBindingTableStateAlignement() const override {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
|
||||
}
|
||||
|
||||
size_t getInterfaceDescriptorDataSize() const override {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
return sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
size_t getRenderSurfaceStateSize() const override {
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
return sizeof(RENDER_SURFACE_STATE);
|
||||
}
|
||||
|
||||
const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
|
||||
|
||||
size_t getMaxBarrierRegisterPerSlice() const override;
|
||||
|
||||
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
|
||||
|
||||
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
|
||||
|
||||
uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;
|
||||
|
||||
void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) override;
|
||||
|
||||
void adjustDefaultEngineType(HardwareInfo *pHwInfo) override;
|
||||
|
||||
void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) override;
|
||||
|
||||
bool isL3Configurable(const HardwareInfo &hwInfo) override;
|
||||
|
||||
SipKernelType getSipKernelType(bool debuggingActive) override;
|
||||
|
||||
bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool hvAlign4Required() const override;
|
||||
|
||||
bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const override;
|
||||
|
||||
bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override;
|
||||
|
||||
bool timestampPacketWriteSupported() const override;
|
||||
|
||||
bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
bool isReadOnly,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) override;
|
||||
|
||||
const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const override;
|
||||
|
||||
const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
|
||||
|
||||
bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
std::string getExtensions() const override;
|
||||
|
||||
uint32_t getMetricsLibraryGenId() const override;
|
||||
|
||||
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
|
||||
|
||||
bool requiresAuxResolves() const override;
|
||||
|
||||
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
|
||||
|
||||
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
|
||||
|
||||
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) override;
|
||||
|
||||
static AuxTranslationMode getAuxTranslationMode();
|
||||
|
||||
static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
|
||||
|
||||
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
|
||||
|
||||
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
|
||||
|
||||
uint32_t getMinimalSIMDSize() override;
|
||||
|
||||
protected:
|
||||
static const AuxTranslationMode defaultAuxTranslationMode;
|
||||
HwHelperHw() = default;
|
||||
};
|
||||
|
||||
struct DwordBuilder {
|
||||
static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) {
|
||||
uint32_t dword = initValue;
|
||||
if (set) {
|
||||
dword |= (1 << bitNumberToSet);
|
||||
}
|
||||
if (masked) {
|
||||
dword |= (1 << (bitNumberToSet + 16));
|
||||
}
|
||||
return dword;
|
||||
};
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct LriHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
static MI_LOAD_REGISTER_IMM *program(LinearStream *cmdStream, uint32_t address, uint32_t value) {
|
||||
auto lri = (MI_LOAD_REGISTER_IMM *)cmdStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*lri = GfxFamily::cmdInitLoadRegisterImm;
|
||||
lri->setRegisterOffset(address);
|
||||
lri->setDataDword(value);
|
||||
return lri;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct MemorySynchronizationCommands {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
|
||||
POST_SYNC_OPERATION operation,
|
||||
uint64_t gpuAddress,
|
||||
uint64_t immediateData,
|
||||
bool dcFlush, const HardwareInfo &hwInfo);
|
||||
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
|
||||
static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
|
||||
static void setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo);
|
||||
static PIPE_CONTROL *addPipeControl(LinearStream &commandStream, bool dcFlush);
|
||||
static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo);
|
||||
static size_t getSizeForSinglePipeControl();
|
||||
static size_t getSizeForSingleSynchronization(const HardwareInfo &hwInfo);
|
||||
static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
|
||||
|
||||
static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
|
||||
static size_t getSizeForFullCacheFlush();
|
||||
static void setExtraCacheFlushFields(PIPE_CONTROL *pipeControl);
|
||||
|
||||
protected:
|
||||
static PIPE_CONTROL *obtainPipeControl(LinearStream &commandStream, bool dcFlush);
|
||||
};
|
||||
|
||||
union SURFACE_STATE_BUFFER_LENGTH {
|
||||
uint32_t Length;
|
||||
struct SurfaceState {
|
||||
uint32_t Width : BITFIELD_RANGE(0, 6);
|
||||
uint32_t Height : BITFIELD_RANGE(7, 20);
|
||||
uint32_t Depth : BITFIELD_RANGE(21, 31);
|
||||
} SurfaceState;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
320
shared/source/helpers/hw_helper_base.inl
Normal file
320
shared/source/helpers/hw_helper_base.inl
Normal file
@@ -0,0 +1,320 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "execution_environment/execution_environment.h"
|
||||
#include "gmm_helper/gmm.h"
|
||||
#include "gmm_helper/gmm_helper.h"
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/hw_info.h"
|
||||
#include "helpers/preamble.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
#include "memory_manager/memory_constants.h"
|
||||
#include "os_interface/os_interface.h"
|
||||
#include "opencl/source/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
#include "instrumentation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename Family>
|
||||
const aub_stream::EngineType HwHelperHw<Family>::lowPriorityEngineType = aub_stream::EngineType::ENGINE_RCS;
|
||||
|
||||
template <typename Family>
|
||||
const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Builtin;
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const {
|
||||
return size > KB;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) {
|
||||
caps->image3DMaxHeight = 16384;
|
||||
caps->image3DMaxWidth = 16384;
|
||||
//With statefull messages we have an allocation cap of 4GB
|
||||
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
|
||||
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
|
||||
caps->isStatelesToStatefullWithOffsetSupported = true;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::isL3Configurable(const HardwareInfo &hwInfo) {
|
||||
return PreambleHelper<Family>::isL3Configurable(hwInfo);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
|
||||
if (!debuggingActive) {
|
||||
return SipKernelType::Csr;
|
||||
}
|
||||
return SipKernelType::DbgCsr;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t HwHelperHw<Family>::getMaxBarrierRegisterPerSlice() const {
|
||||
return 32;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) {
|
||||
return 4u;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
const AubMemDump::LrcaHelper &HwHelperHw<Family>::getCsTraits(aub_stream::EngineType engineType) const {
|
||||
return *AUBFamilyMapper<Family>::csTraits[engineType];
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::isPageTableManagerSupported(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
size_t bufferSize,
|
||||
uint64_t gpuVa,
|
||||
size_t offset,
|
||||
uint32_t pitch,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
bool isReadOnly,
|
||||
uint32_t surfaceType,
|
||||
bool forceNonAuxMode) {
|
||||
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
auto gmmHelper = executionEnvironment.getGmmHelper();
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer);
|
||||
*surfaceState = Family::cmdInitRenderSurfaceState;
|
||||
auto surfaceSize = alignUp(bufferSize, 4);
|
||||
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
|
||||
|
||||
surfaceState->setWidth(Length.SurfaceState.Width + 1);
|
||||
surfaceState->setHeight(Length.SurfaceState.Height + 1);
|
||||
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
|
||||
if (pitch) {
|
||||
surfaceState->setSurfacePitch(pitch);
|
||||
}
|
||||
|
||||
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
|
||||
auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa;
|
||||
bufferStateAddress += offset;
|
||||
|
||||
auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize;
|
||||
|
||||
surfaceState->setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType));
|
||||
|
||||
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
|
||||
surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
|
||||
surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
|
||||
|
||||
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) ||
|
||||
isReadOnly) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
|
||||
} else {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
}
|
||||
|
||||
surfaceState->setSurfaceBaseAddress(bufferStateAddress);
|
||||
|
||||
Gmm *gmm = gfxAlloc ? gfxAlloc->getDefaultGmm() : nullptr;
|
||||
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
|
||||
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAlloc->getAllocationType()) {
|
||||
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
|
||||
} else {
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const {
|
||||
if (DebugManager.flags.EnableLocalMemory.get() != -1) {
|
||||
return DebugManager.flags.EnableLocalMemory.get();
|
||||
} else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode() {
|
||||
if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) {
|
||||
return static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
|
||||
}
|
||||
|
||||
return HwHelperHw<Family>::defaultAuxTranslationMode;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
|
||||
hwInfo.capabilityTable.blitterOperationsSupported &&
|
||||
multiDispatchInfo.getMemObjsForAuxTranslation() &&
|
||||
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
|
||||
addPipeControlWA(commandStream, gpuAddress, hwInfo);
|
||||
|
||||
auto pipeControl = obtainPipeControl(commandStream, dcFlush);
|
||||
pipeControl->setPostSyncOperation(operation);
|
||||
pipeControl->setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
|
||||
pipeControl->setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
|
||||
pipeControl->setDcFlushEnable(dcFlush);
|
||||
if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
pipeControl->setImmediateData(immediateData);
|
||||
}
|
||||
|
||||
setExtraPipeControlProperties(*pipeControl, hwInfo);
|
||||
|
||||
MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
|
||||
|
||||
return pipeControl;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(LinearStream &commandStream, bool dcFlush) {
|
||||
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pCmd = GfxFamily::cmdInitPipeControl;
|
||||
pCmd->setCommandStreamerStallEnable(true);
|
||||
pCmd->setDcFlushEnable(dcFlush);
|
||||
|
||||
if (DebugManager.flags.FlushAllCaches.get()) {
|
||||
pCmd->setDcFlushEnable(true);
|
||||
pCmd->setRenderTargetCacheFlushEnable(true);
|
||||
pCmd->setInstructionCacheInvalidateEnable(true);
|
||||
pCmd->setTextureCacheInvalidationEnable(true);
|
||||
pCmd->setPipeControlFlushEnable(true);
|
||||
pCmd->setVfCacheInvalidationEnable(true);
|
||||
pCmd->setConstantCacheInvalidationEnable(true);
|
||||
pCmd->setStateCacheInvalidationEnable(true);
|
||||
}
|
||||
return pCmd;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
|
||||
return MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, dcFlush);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) {
|
||||
const auto pipeControlCount = HardwareCommandsHelper<GfxFamily>::isPipeControlWArequired(hwInfo) ? 2u : 1u;
|
||||
return pipeControlCount * getSizeForSinglePipeControl() + getSizeForAdditonalSynchronization(hwInfo);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
|
||||
if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) {
|
||||
return false;
|
||||
}
|
||||
return !isImage1d;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
|
||||
return HardwareCommandsHelper<GfxFamily>::alignSlmSize(slmSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
|
||||
return hasBarriers;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
|
||||
return 8u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
|
||||
auto pipeControl = MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, true);
|
||||
|
||||
pipeControl->setRenderTargetCacheFlushEnable(true);
|
||||
pipeControl->setInstructionCacheInvalidateEnable(true);
|
||||
pipeControl->setTextureCacheInvalidationEnable(true);
|
||||
pipeControl->setPipeControlFlushEnable(true);
|
||||
pipeControl->setConstantCacheInvalidationEnable(true);
|
||||
pipeControl->setStateCacheInvalidationEnable(true);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(pipeControl);
|
||||
|
||||
return pipeControl;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const StackVec<size_t, 3> HwHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
|
||||
return {8, 16, 32};
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
96
shared/source/helpers/hw_helper_bdw_plus.inl
Normal file
96
shared/source/helpers/hw_helper_bdw_plus.inl
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gmm_helper/gmm_helper.h"
|
||||
#include "helpers/hw_helper_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HwHelperHw<GfxFamily>::adjustDefaultEngineType(HardwareInfo *pHwInfo) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
|
||||
return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice *
|
||||
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
|
||||
coherencyFlag = true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::hvAlign4Required() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const std::vector<aub_stream::EngineType> HwHelperHw<GfxFamily>::getGpgpuEngineInstances() const {
|
||||
constexpr std::array<aub_stream::EngineType, 3> gpgpuEngineInstances = {{aub_stream::ENGINE_RCS,
|
||||
aub_stream::ENGINE_RCS, // low priority
|
||||
aub_stream::ENGINE_RCS}}; // internal usage
|
||||
return std::vector<aub_stream::EngineType>(gpgpuEngineInstances.begin(), gpgpuEngineInstances.end());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
std::string HwHelperHw<GfxFamily>::getExtensions() const {
|
||||
return "";
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
|
||||
if (l3enabled) {
|
||||
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
|
||||
}
|
||||
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
return threadsPerEu * euCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleSynchronization(const HardwareInfo &hwInfo) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL *pipeControl) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
21
shared/source/helpers/hw_helper_extended.cpp
Normal file
21
shared/source/helpers/hw_helper_extended.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
uint32_t HwHelper::getSubDevicesCount(const HardwareInfo *pHwInfo) {
|
||||
return DebugManager.flags.CreateMultipleSubDevices.get() > 0 ? DebugManager.flags.CreateMultipleSubDevices.get() : 1u;
|
||||
}
|
||||
|
||||
uint32_t HwHelper::getEnginesCount(const HardwareInfo &hwInfo) {
|
||||
return 1u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
122
shared/source/helpers/hw_info.cpp
Normal file
122
shared/source/helpers/hw_info.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/hw_info.h"
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
#include "helpers/hw_cmds.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
HardwareInfo::HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
|
||||
const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable)
|
||||
: platform(*platform), featureTable(*featureTable), workaroundTable(*workaroundTable), gtSystemInfo(*gtSystemInfo), capabilityTable(capabilityTable) {
|
||||
}
|
||||
|
||||
// Global table of hardware prefixes
|
||||
const char *hardwarePrefix[IGFX_MAX_PRODUCT] = {
|
||||
nullptr,
|
||||
};
|
||||
|
||||
// Global table of default hardware info configs
|
||||
uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT] = {
|
||||
0x0,
|
||||
};
|
||||
|
||||
// Global table of family names
|
||||
const char *familyName[IGFX_MAX_CORE] = {
|
||||
nullptr,
|
||||
};
|
||||
// Global table of family names
|
||||
bool familyEnabled[IGFX_MAX_CORE] = {
|
||||
false,
|
||||
};
|
||||
|
||||
const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT] = {};
|
||||
void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *, bool, uint64_t) = {
|
||||
0x0,
|
||||
};
|
||||
|
||||
bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn) {
|
||||
std::transform(platform.begin(), platform.end(), platform.begin(), ::tolower);
|
||||
|
||||
bool ret = false;
|
||||
for (int j = 0; j < IGFX_MAX_PRODUCT; j++) {
|
||||
if (hardwarePrefix[j] == nullptr)
|
||||
continue;
|
||||
if (hardwarePrefix[j] == platform) {
|
||||
hwInfoIn = hardwareInfoTable[j];
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn) {
|
||||
uint32_t sliceCount = static_cast<uint16_t>(hwInfoConfig >> 32);
|
||||
uint32_t subSlicePerSliceCount = static_cast<uint16_t>(hwInfoConfig >> 16);
|
||||
uint32_t euPerSubSliceCount = static_cast<uint16_t>(hwInfoConfig);
|
||||
|
||||
hwInfoIn.gtSystemInfo.SliceCount = sliceCount;
|
||||
hwInfoIn.gtSystemInfo.SubSliceCount = subSlicePerSliceCount * sliceCount;
|
||||
hwInfoIn.gtSystemInfo.EUCount = euPerSubSliceCount * subSlicePerSliceCount * sliceCount;
|
||||
}
|
||||
|
||||
bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig) {
|
||||
hwInfoConfig = 0u;
|
||||
|
||||
size_t currPos = hwInfoConfigStr.find('x', 0);
|
||||
if (currPos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
uint32_t sliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(0, currPos)));
|
||||
if (sliceCount > std::numeric_limits<uint16_t>::max()) {
|
||||
return false;
|
||||
}
|
||||
size_t prevPos = currPos + 1;
|
||||
|
||||
currPos = hwInfoConfigStr.find('x', prevPos);
|
||||
if (currPos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
uint32_t subSlicePerSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, currPos)));
|
||||
if (subSlicePerSliceCount > std::numeric_limits<uint16_t>::max()) {
|
||||
return false;
|
||||
}
|
||||
uint32_t subSliceCount = subSlicePerSliceCount * sliceCount;
|
||||
if (subSliceCount > std::numeric_limits<uint16_t>::max()) {
|
||||
return false;
|
||||
}
|
||||
prevPos = currPos + 1;
|
||||
|
||||
uint32_t euPerSubSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, std::string::npos)));
|
||||
if (euPerSubSliceCount > std::numeric_limits<uint16_t>::max()) {
|
||||
return false;
|
||||
}
|
||||
uint32_t euCount = euPerSubSliceCount * subSliceCount;
|
||||
if (euCount > std::numeric_limits<uint16_t>::max()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hwInfoConfig = static_cast<uint64_t>(sliceCount & 0xffff) << 32 | static_cast<uint64_t>(subSlicePerSliceCount & 0xffff) << 16 | static_cast<uint64_t>(euPerSubSliceCount & 0xffff);
|
||||
return true;
|
||||
}
|
||||
|
||||
aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo) {
|
||||
return DebugManager.flags.NodeOrdinal.get() == -1
|
||||
? hwInfo.capabilityTable.defaultEngineType
|
||||
: static_cast<aub_stream::EngineType>(DebugManager.flags.NodeOrdinal.get());
|
||||
}
|
||||
|
||||
const std::string getFamilyNameWithType(const HardwareInfo &hwInfo) {
|
||||
std::string platformName = familyName[hwInfo.platform.eRenderCoreFamily];
|
||||
platformName.append(hwInfo.capabilityTable.platformType);
|
||||
return platformName;
|
||||
}
|
||||
} // namespace NEO
|
||||
115
shared/source/helpers/hw_info.h
Normal file
115
shared/source/helpers/hw_info.h
Normal file
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/preemption_mode.h"
|
||||
#include "direct_submission/direct_submission_properties.h"
|
||||
#include "helpers/kmd_notify_properties.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
#include "gtsysinfo.h"
|
||||
#include "igfxfmid.h"
|
||||
#include "sku_info.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct RuntimeCapabilityTable {
|
||||
DirectSubmissionProperyEngines directSubmissionEngines;
|
||||
KmdNotifyProperties kmdNotifyProperties;
|
||||
uint64_t gpuAddressSpace;
|
||||
double defaultProfilingTimerResolution;
|
||||
size_t requiredPreemptionSurfaceSize;
|
||||
bool (*isSimulation)(unsigned short);
|
||||
PreemptionMode defaultPreemptionMode;
|
||||
aub_stream::EngineType defaultEngineType;
|
||||
uint32_t maxRenderFrequency;
|
||||
unsigned int clVersionSupport;
|
||||
uint32_t aubDeviceId;
|
||||
uint32_t extraQuantityThreadsPerEU;
|
||||
uint32_t slmSize;
|
||||
uint32_t grfSize;
|
||||
bool blitterOperationsSupported;
|
||||
bool ftrSupportsInteger64BitAtomics;
|
||||
bool ftrSupportsFP64;
|
||||
bool ftrSupports64BitMath;
|
||||
bool ftrSvm;
|
||||
bool ftrSupportsCoherency;
|
||||
bool ftrSupportsVmeAvcTextureSampler;
|
||||
bool ftrSupportsVmeAvcPreemption;
|
||||
bool ftrRenderCompressedBuffers;
|
||||
bool ftrRenderCompressedImages;
|
||||
bool ftr64KBpages;
|
||||
bool instrumentationEnabled;
|
||||
bool forceStatelessCompilationFor32Bit;
|
||||
const char *platformType;
|
||||
bool debuggerSupported;
|
||||
bool supportsVme;
|
||||
bool supportCacheFlushAfterWalker;
|
||||
bool supportsImages;
|
||||
bool supportsDeviceEnqueue;
|
||||
bool hostPtrTrackingEnabled;
|
||||
};
|
||||
|
||||
struct HardwareCapabilities {
|
||||
size_t image3DMaxWidth;
|
||||
size_t image3DMaxHeight;
|
||||
uint64_t maxMemAllocSize;
|
||||
bool isStatelesToStatefullWithOffsetSupported;
|
||||
};
|
||||
|
||||
struct HardwareInfo {
|
||||
HardwareInfo() = default;
|
||||
HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
|
||||
const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable);
|
||||
|
||||
PLATFORM platform = {};
|
||||
FeatureTable featureTable = {};
|
||||
WorkaroundTable workaroundTable = {};
|
||||
alignas(4) GT_SYSTEM_INFO gtSystemInfo = {};
|
||||
|
||||
RuntimeCapabilityTable capabilityTable = {};
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY product>
|
||||
struct HwMapper {};
|
||||
|
||||
template <GFXCORE_FAMILY gfxFamily>
|
||||
struct GfxFamilyMapper {};
|
||||
|
||||
// Global table of hardware prefixes
|
||||
extern bool familyEnabled[IGFX_MAX_CORE];
|
||||
extern const char *familyName[IGFX_MAX_CORE];
|
||||
extern const char *hardwarePrefix[IGFX_MAX_PRODUCT];
|
||||
extern uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT];
|
||||
extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT];
|
||||
extern void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig);
|
||||
|
||||
template <GFXCORE_FAMILY gfxFamily>
|
||||
struct EnableGfxFamilyHw {
|
||||
EnableGfxFamilyHw() {
|
||||
familyEnabled[gfxFamily] = true;
|
||||
familyName[gfxFamily] = GfxFamilyMapper<gfxFamily>::name;
|
||||
}
|
||||
};
|
||||
|
||||
bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn);
|
||||
void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn);
|
||||
bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig);
|
||||
aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo);
|
||||
const std::string getFamilyNameWithType(const HardwareInfo &hwInfo);
|
||||
|
||||
// Utility conversion
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
struct ToGfxCoreFamily {
|
||||
static const GFXCORE_FAMILY gfxCoreFamily =
|
||||
static_cast<GFXCORE_FAMILY>(NEO::HwMapper<productFamily>::gfxFamily);
|
||||
static constexpr GFXCORE_FAMILY get() { return gfxCoreFamily; }
|
||||
};
|
||||
} // namespace NEO
|
||||
19
shared/source/helpers/interlocked_max.h
Normal file
19
shared/source/helpers/interlocked_max.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
template <typename Type>
|
||||
void interlockedMax(std::atomic<Type> &dest, Type newVal) {
|
||||
Type oldVal = dest;
|
||||
Type maxVal = oldVal < newVal ? newVal : oldVal;
|
||||
while (!std::atomic_compare_exchange_weak(&dest, &oldVal, maxVal)) {
|
||||
oldVal = dest;
|
||||
maxVal = oldVal < newVal ? newVal : oldVal;
|
||||
}
|
||||
}
|
||||
44
shared/source/helpers/kernel_helpers.cpp
Normal file
44
shared/source/helpers/kernel_helpers.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/kernel_helpers.h"
|
||||
|
||||
#include "helpers/basic_math.h"
|
||||
#include "helpers/debug_helpers.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize) {
|
||||
UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
|
||||
UNRECOVERABLE_IF(localWorkSize == nullptr);
|
||||
|
||||
size_t workGroupSize = localWorkSize[0];
|
||||
for (uint32_t i = 1; i < workDim; i++) {
|
||||
workGroupSize *= localWorkSize[i];
|
||||
}
|
||||
|
||||
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
||||
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
|
||||
|
||||
if (numberOfBarriers > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
||||
}
|
||||
|
||||
if (usedSlmSize > 0) {
|
||||
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
||||
}
|
||||
|
||||
return maxWorkGroupsCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
19
shared/source/helpers/kernel_helpers.h
Normal file
19
shared/source/helpers/kernel_helpers.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct KernelHelper {
|
||||
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
78
shared/source/helpers/kmd_notify_properties.cpp
Normal file
78
shared/source/helpers/kmd_notify_properties.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/kmd_notify_properties.h"
|
||||
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
bool KmdNotifyHelper::obtainTimeoutParams(int64_t &timeoutValueOutput,
|
||||
bool quickKmdSleepRequest,
|
||||
uint32_t currentHwTag,
|
||||
uint32_t taskCountToWait,
|
||||
FlushStamp flushStampToWait,
|
||||
bool forcePowerSavingMode) {
|
||||
if (flushStampToWait == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.PowerSavingMode.get() || forcePowerSavingMode) {
|
||||
timeoutValueOutput = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int64_t multiplier = (currentHwTag < taskCountToWait) ? static_cast<int64_t>(taskCountToWait - currentHwTag) : 1;
|
||||
if (!properties->enableKmdNotify && multiplier > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine) {
|
||||
updateAcLineStatus();
|
||||
}
|
||||
|
||||
quickKmdSleepRequest |= applyQuickKmdSleepForSporadicWait();
|
||||
|
||||
if (!properties->enableKmdNotify && !acLineConnected) {
|
||||
timeoutValueOutput = KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine;
|
||||
} else if (quickKmdSleepRequest && properties->enableQuickKmdSleep) {
|
||||
timeoutValueOutput = properties->delayQuickKmdSleepMicroseconds;
|
||||
} else {
|
||||
timeoutValueOutput = getBaseTimeout(multiplier);
|
||||
}
|
||||
|
||||
return (properties->enableKmdNotify || !acLineConnected);
|
||||
}
|
||||
|
||||
bool KmdNotifyHelper::applyQuickKmdSleepForSporadicWait() const {
|
||||
if (properties->enableQuickKmdSleepForSporadicWaits) {
|
||||
auto timeDiff = getMicrosecondsSinceEpoch() - lastWaitForCompletionTimestampUs.load();
|
||||
if (timeDiff > properties->delayQuickKmdSleepForSporadicWaitsMicroseconds) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void KmdNotifyHelper::updateLastWaitForCompletionTimestamp() {
|
||||
lastWaitForCompletionTimestampUs = getMicrosecondsSinceEpoch();
|
||||
}
|
||||
|
||||
int64_t KmdNotifyHelper::getMicrosecondsSinceEpoch() const {
|
||||
auto now = std::chrono::high_resolution_clock::now().time_since_epoch();
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(now).count();
|
||||
}
|
||||
|
||||
void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination) {
|
||||
if (debugVariableValue >= 0) {
|
||||
destination = static_cast<int64_t>(debugVariableValue);
|
||||
}
|
||||
}
|
||||
|
||||
void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, bool &destination) {
|
||||
if (debugVariableValue >= 0) {
|
||||
destination = !!(debugVariableValue);
|
||||
}
|
||||
}
|
||||
62
shared/source/helpers/kmd_notify_properties.h
Normal file
62
shared/source/helpers/kmd_notify_properties.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/completion_stamp.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
struct KmdNotifyProperties {
|
||||
int64_t delayKmdNotifyMicroseconds;
|
||||
int64_t delayQuickKmdSleepMicroseconds;
|
||||
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
|
||||
// Main switch for KMD Notify optimization - if its disabled, all below are disabled too
|
||||
bool enableKmdNotify;
|
||||
// Use smaller delay in specific situations (ie. from AsyncEventsHandler)
|
||||
bool enableQuickKmdSleep;
|
||||
// If waits are called sporadically use QuickKmdSleep mode, otherwise use standard delay
|
||||
bool enableQuickKmdSleepForSporadicWaits;
|
||||
};
|
||||
|
||||
namespace KmdNotifyConstants {
|
||||
constexpr int64_t timeoutInMicrosecondsForDisconnectedAcLine = 10000;
|
||||
constexpr uint32_t minimumTaskCountDiffToCheckAcLine = 10;
|
||||
} // namespace KmdNotifyConstants
|
||||
|
||||
class KmdNotifyHelper {
|
||||
public:
|
||||
KmdNotifyHelper() = delete;
|
||||
KmdNotifyHelper(const KmdNotifyProperties *properties) : properties(properties){};
|
||||
MOCKABLE_VIRTUAL ~KmdNotifyHelper() = default;
|
||||
|
||||
bool obtainTimeoutParams(int64_t &timeoutValueOutput,
|
||||
bool quickKmdSleepRequest,
|
||||
uint32_t currentHwTag,
|
||||
uint32_t taskCountToWait,
|
||||
FlushStamp flushStampToWait,
|
||||
bool forcePowerSavingMode);
|
||||
|
||||
bool quickKmdSleepForSporadicWaitsEnabled() const { return properties->enableQuickKmdSleepForSporadicWaits; }
|
||||
MOCKABLE_VIRTUAL void updateLastWaitForCompletionTimestamp();
|
||||
MOCKABLE_VIRTUAL void updateAcLineStatus();
|
||||
|
||||
static void overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination);
|
||||
static void overrideFromDebugVariable(int32_t debugVariableValue, bool &destination);
|
||||
|
||||
protected:
|
||||
bool applyQuickKmdSleepForSporadicWait() const;
|
||||
int64_t getBaseTimeout(const int64_t &multiplier) const;
|
||||
int64_t getMicrosecondsSinceEpoch() const;
|
||||
|
||||
const KmdNotifyProperties *properties = nullptr;
|
||||
std::atomic<int64_t> lastWaitForCompletionTimestampUs{0};
|
||||
std::atomic<bool> acLineConnected{true};
|
||||
};
|
||||
} // namespace NEO
|
||||
29
shared/source/helpers/non_copyable_or_moveable.h
Normal file
29
shared/source/helpers/non_copyable_or_moveable.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
namespace NEO {
|
||||
class NonCopyableOrMovableClass {
|
||||
public:
|
||||
NonCopyableOrMovableClass() = default;
|
||||
NonCopyableOrMovableClass(const NonCopyableOrMovableClass &) = delete;
|
||||
NonCopyableOrMovableClass &operator=(const NonCopyableOrMovableClass &) = delete;
|
||||
|
||||
NonCopyableOrMovableClass(NonCopyableOrMovableClass &&) = delete;
|
||||
NonCopyableOrMovableClass &operator=(NonCopyableOrMovableClass &&) = delete;
|
||||
};
|
||||
|
||||
class NonCopyableClass {
|
||||
public:
|
||||
NonCopyableClass() = default;
|
||||
NonCopyableClass(const NonCopyableClass &) = delete;
|
||||
NonCopyableClass &operator=(const NonCopyableClass &) = delete;
|
||||
|
||||
NonCopyableClass(NonCopyableClass &&) = default;
|
||||
NonCopyableClass &operator=(NonCopyableClass &&) = default;
|
||||
};
|
||||
} // namespace NEO
|
||||
37
shared/source/helpers/options.h
Normal file
37
shared/source/helpers/options.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
#ifndef KMD_PROFILING
|
||||
#define KMD_PROFILING 0
|
||||
#endif
|
||||
|
||||
namespace NEO {
|
||||
enum CommandStreamReceiverType {
|
||||
// Use receiver for real HW
|
||||
CSR_HW = 0,
|
||||
// Capture an AUB file automatically for all traffic going through Device -> CommandStreamReceiver
|
||||
CSR_AUB,
|
||||
// Capture an AUB and tunnel all commands going through Device -> CommandStreamReceiver to a TBX server
|
||||
CSR_TBX,
|
||||
// Use receiver for real HW and capture AUB file
|
||||
CSR_HW_WITH_AUB,
|
||||
// Use TBX server and capture AUB file
|
||||
CSR_TBX_WITH_AUB,
|
||||
// Number of CSR types
|
||||
CSR_TYPES_NUM
|
||||
};
|
||||
|
||||
// AUB file folder location
|
||||
extern const char *folderAUB;
|
||||
|
||||
// Initial value for HW tag
|
||||
// Set to 0 if using HW or simulator, otherwise 0xFFFFFF00, needs to be lower then CompletionStamp::levelNotReady.
|
||||
extern uint32_t initialHardwareTag;
|
||||
} // namespace NEO
|
||||
15
shared/source/helpers/pipeline_select_args.h
Normal file
15
shared/source/helpers/pipeline_select_args.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace NEO {
|
||||
struct PipelineSelectArgs {
|
||||
bool specialPipelineSelectMode = false;
|
||||
bool mediaSamplerRequired = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
15
shared/source/helpers/pipeline_select_helper.h
Normal file
15
shared/source/helpers/pipeline_select_helper.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;
|
||||
const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10;
|
||||
const uint32_t pipelineSelectMediaSamplerPowerClockGateMaskBits = 0x40;
|
||||
} // namespace NEO
|
||||
98
shared/source/helpers/preamble.h
Normal file
98
shared/source/helpers/preamble.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/pipeline_select_helper.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
#include "igfxfmid.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct HardwareInfo;
|
||||
class Device;
|
||||
struct DispatchFlags;
|
||||
class GraphicsAllocation;
|
||||
class LinearStream;
|
||||
struct PipelineSelectArgs;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct PreambleHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
|
||||
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream,
|
||||
const PipelineSelectArgs &pipelineSelectArgs,
|
||||
const HardwareInfo &hwInfo);
|
||||
static uint32_t getDefaultThreadArbitrationPolicy();
|
||||
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
|
||||
static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);
|
||||
static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, aub_stream::EngineType engineType);
|
||||
static uint64_t programVFEState(LinearStream *pCommandStream,
|
||||
const HardwareInfo &hwInfo,
|
||||
int scratchSize,
|
||||
uint64_t scratchAddress,
|
||||
uint32_t maxFrontEndThreads,
|
||||
aub_stream::EngineType engineType);
|
||||
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
|
||||
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
|
||||
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
|
||||
static void programKernelDebugging(LinearStream *pCommandStream);
|
||||
static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset);
|
||||
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
|
||||
static bool isL3Configurable(const HardwareInfo &hwInfo);
|
||||
static size_t getAdditionalCommandsSize(const Device &device);
|
||||
static size_t getThreadArbitrationCommandsSize();
|
||||
static size_t getVFECommandsSize();
|
||||
static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
|
||||
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
|
||||
static uint32_t getUrbEntryAllocationSize();
|
||||
static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
|
||||
static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY ProductFamily>
|
||||
static uint32_t getL3ConfigHelper(bool useSLM);
|
||||
|
||||
template <PRODUCT_FAMILY ProductFamily>
|
||||
struct L3CNTLREGConfig {
|
||||
static const uint32_t valueForSLM;
|
||||
static const uint32_t valueForNoSLM;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY ProductFamily>
|
||||
uint32_t getL3ConfigHelper(bool useSLM) {
|
||||
if (!useSLM) {
|
||||
return L3CNTLREGConfig<ProductFamily>::valueForNoSLM;
|
||||
}
|
||||
return L3CNTLREGConfig<ProductFamily>::valueForSLM;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct L3CNTLRegisterOffset {
|
||||
static const uint32_t registerOffset;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct DebugModeRegisterOffset {
|
||||
enum {
|
||||
registerOffset = 0x20ec,
|
||||
debugEnabledValue = (1 << 6) | (1 << 22)
|
||||
};
|
||||
};
|
||||
|
||||
namespace TdDebugControlRegisterOffset {
|
||||
static constexpr uint32_t registerOffset = 0xe400;
|
||||
static constexpr uint32_t debugEnabledValue = (1 << 4) | (1 << 7);
|
||||
}; // namespace TdDebugControlRegisterOffset
|
||||
|
||||
} // namespace NEO
|
||||
118
shared/source/helpers/preamble_base.inl
Normal file
118
shared/source/helpers/preamble_base.inl
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "command_stream/linear_stream.h"
|
||||
#include "command_stream/preemption.h"
|
||||
#include "device/device.h"
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "helpers/hw_cmds.h"
|
||||
#include "helpers/preamble.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
#include "reg_configs_common.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
|
||||
size_t totalSize = PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
|
||||
totalSize += getKernelDebuggingCommandsSize(device.isDebuggerActive());
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo) {
|
||||
size_t size = 0;
|
||||
using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT;
|
||||
size += sizeof(PIPELINE_SELECT);
|
||||
if (HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) {
|
||||
size += sizeof(PIPE_CONTROL);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
|
||||
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {
|
||||
programL3(pCommandStream, l3Config);
|
||||
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
|
||||
programPreemption(pCommandStream, device, preemptionCsr);
|
||||
if (device.isDebuggerActive()) {
|
||||
programKernelDebugging(pCommandStream);
|
||||
}
|
||||
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
|
||||
if (perDssBackedBuffer != nullptr) {
|
||||
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr) {
|
||||
PreemptionHelper::programCsrBaseAddress<GfxFamily>(*pCommandStream, device, preemptionCsr);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programKernelDebugging(LinearStream *pCommandStream) {
|
||||
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
|
||||
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
|
||||
pCmd->setRegisterOffset(DebugModeRegisterOffset<GfxFamily>::registerOffset);
|
||||
pCmd->setDataDword(DebugModeRegisterOffset<GfxFamily>::debugEnabledValue);
|
||||
|
||||
auto pCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
|
||||
*pCmd2 = GfxFamily::cmdInitLoadRegisterImm;
|
||||
pCmd2->setRegisterOffset(TdDebugControlRegisterOffset::registerOffset);
|
||||
pCmd2->setDataDword(TdDebugControlRegisterOffset::debugEnabledValue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(bool debuggingActive) {
|
||||
if (debuggingActive) {
|
||||
return 2 * sizeof(MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool PreambleHelper<GfxFamily>::isL3Configurable(const HardwareInfo &hwInfo) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
63
shared/source/helpers/preamble_bdw_plus.inl
Normal file
63
shared/source/helpers/preamble_bdw_plus.inl
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/flat_batch_buffer_helper.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/preamble_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
|
||||
|
||||
pCmd->setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
|
||||
pCmd->setDataDword(l3Config);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
|
||||
return 0x782;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream,
|
||||
const HardwareInfo &hwInfo,
|
||||
int scratchSize,
|
||||
uint64_t scratchAddress,
|
||||
uint32_t maxFrontEndThreads,
|
||||
aub_stream::EngineType engineType) {
|
||||
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
|
||||
|
||||
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
|
||||
|
||||
auto scratchSpaceAddressOffset = static_cast<uint64_t>(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET);
|
||||
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)));
|
||||
*pMediaVfeState = GfxFamily::cmdInitMediaVfeState;
|
||||
pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads);
|
||||
pMediaVfeState->setNumberOfUrbEntries(1);
|
||||
pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
|
||||
pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
|
||||
uint32_t lowAddress = static_cast<uint32_t>(0xFFFFFFFF & scratchAddress);
|
||||
uint32_t highAddress = static_cast<uint32_t>(0xFFFFFFFF & (scratchAddress >> 32));
|
||||
pMediaVfeState->setScratchSpaceBasePointer(lowAddress);
|
||||
pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress);
|
||||
|
||||
programAdditionalFieldsInVfeState(pMediaVfeState, hwInfo);
|
||||
|
||||
return scratchSpaceAddressOffset;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t PreambleHelper<GfxFamily>::getVFECommandsSize() {
|
||||
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
|
||||
return sizeof(MEDIA_VFE_STATE) + sizeof(PIPE_CONTROL);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
81
shared/source/helpers/ptr_math.h
Normal file
81
shared/source/helpers/ptr_math.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
static const int ptrGarbageContent[16] = {
|
||||
0x0131, 0x133, 0xA, 0xEF,
|
||||
0x0131, 0x133, 0xA, 0xEF,
|
||||
0x0131, 0x133, 0xA, 0xEF,
|
||||
0x0131, 0x133, 0xA, 0xEF};
|
||||
static const auto ptrGarbage = (void *)ptrGarbageContent;
|
||||
|
||||
template <typename T>
|
||||
inline T ptrOffset(T ptrBefore, size_t offset) {
|
||||
auto addrBefore = (uintptr_t)ptrBefore;
|
||||
auto addrAfter = addrBefore + offset;
|
||||
return (T)addrAfter;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline uint64_t ptrOffset(uint64_t ptrBefore, size_t offset) {
|
||||
return ptrBefore + offset;
|
||||
}
|
||||
|
||||
template <typename TA, typename TB>
|
||||
inline size_t ptrDiff(TA ptrAfter, TB ptrBefore) {
|
||||
auto addrBefore = (uintptr_t)ptrBefore;
|
||||
auto addrAfter = (uintptr_t)ptrAfter;
|
||||
return addrAfter - addrBefore;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline uint64_t ptrDiff(uint64_t ptrAfter, T ptrBefore) {
|
||||
return ptrAfter - ptrBefore;
|
||||
}
|
||||
|
||||
template <typename IntegerAddressType>
|
||||
inline void *addrToPtr(IntegerAddressType addr) {
|
||||
uintptr_t correctBitnessAddress = static_cast<uintptr_t>(addr);
|
||||
void *ptrReturn = reinterpret_cast<void *>(correctBitnessAddress);
|
||||
return ptrReturn;
|
||||
}
|
||||
|
||||
struct PatchStoreOperation {
|
||||
template <typename T>
|
||||
void operator()(T *memory, T value) {
|
||||
*memory = value;
|
||||
}
|
||||
};
|
||||
|
||||
struct PatchIncrementOperation {
|
||||
template <typename T>
|
||||
void operator()(T *memory, T value) {
|
||||
*memory += value;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PatchOperationT = PatchStoreOperation>
|
||||
inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) {
|
||||
if (patchSize == sizeof(uint64_t)) {
|
||||
uint64_t *curbeAddress = reinterpret_cast<uint64_t *>(memoryToBePatched);
|
||||
PatchOperationT{}(curbeAddress, patchValue);
|
||||
} else {
|
||||
uint32_t *curbeAddress = reinterpret_cast<uint32_t *>(memoryToBePatched);
|
||||
PatchOperationT{}(curbeAddress, static_cast<uint32_t>(patchValue));
|
||||
}
|
||||
}
|
||||
|
||||
inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) {
|
||||
patchWithRequiredSize<PatchIncrementOperation>(memoryToBePatched, patchSize, patchIncrementValue);
|
||||
}
|
||||
|
||||
inline uint64_t castToUint64(const void *address) {
|
||||
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(const_cast<void *>(address)));
|
||||
}
|
||||
74
shared/source/helpers/register_offsets.h
Normal file
74
shared/source/helpers/register_offsets.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
constexpr uint32_t L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000;
|
||||
constexpr uint32_t L3SQC_REG4 = 0xB118;
|
||||
|
||||
constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER = 0xFFFFFFFF;
|
||||
constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER = 0x00000000;
|
||||
|
||||
//Threads Dimension X/Y/Z
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIMX = 0x2500;
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIMY = 0x2504;
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIMZ = 0x2508;
|
||||
|
||||
constexpr uint32_t CS_GPR_R0 = 0x2600;
|
||||
constexpr uint32_t CS_GPR_R1 = 0x2608;
|
||||
constexpr uint32_t CS_GPR_R2 = 0x2610;
|
||||
constexpr uint32_t CS_GPR_R3 = 0x2618;
|
||||
constexpr uint32_t CS_GPR_R4 = 0x2620;
|
||||
constexpr uint32_t CS_GPR_R5 = 0x2628;
|
||||
constexpr uint32_t CS_GPR_R6 = 0x2630;
|
||||
constexpr uint32_t CS_GPR_R7 = 0x2638;
|
||||
constexpr uint32_t CS_GPR_R8 = 0x2640;
|
||||
constexpr uint32_t CS_GPR_R9 = 0x2648;
|
||||
constexpr uint32_t CS_GPR_R10 = 0x2650;
|
||||
constexpr uint32_t CS_GPR_R11 = 0x2658;
|
||||
constexpr uint32_t CS_GPR_R12 = 0x2660;
|
||||
constexpr uint32_t CS_GPR_R13 = 0x2668;
|
||||
constexpr uint32_t CS_GPR_R14 = 0x2670;
|
||||
constexpr uint32_t CS_GPR_R15 = 0x2678;
|
||||
|
||||
constexpr uint32_t CS_PREDICATE_RESULT = 0x2418;
|
||||
|
||||
//Alu opcodes
|
||||
constexpr uint32_t NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4;
|
||||
|
||||
constexpr uint32_t ALU_OPCODE_LOAD = 0x080;
|
||||
constexpr uint32_t ALU_OPCODE_STORE = 0x180;
|
||||
constexpr uint32_t ALU_OPCODE_ADD = 0x100;
|
||||
constexpr uint32_t ALU_OPCODE_SUB = 0x101;
|
||||
constexpr uint32_t ALU_OPCODE_AND = 0x102;
|
||||
constexpr uint32_t ALU_OPCODE_OR = 0x103;
|
||||
|
||||
constexpr uint32_t ALU_REGISTER_R_0 = 0x0;
|
||||
constexpr uint32_t ALU_REGISTER_R_1 = 0x1;
|
||||
constexpr uint32_t ALU_REGISTER_R_2 = 0x2;
|
||||
constexpr uint32_t ALU_REGISTER_R_3 = 0x3;
|
||||
constexpr uint32_t ALU_REGISTER_R_4 = 0x4;
|
||||
constexpr uint32_t ALU_REGISTER_R_5 = 0x5;
|
||||
constexpr uint32_t ALU_REGISTER_R_6 = 0x6;
|
||||
constexpr uint32_t ALU_REGISTER_R_7 = 0x7;
|
||||
constexpr uint32_t ALU_REGISTER_R_8 = 0x8;
|
||||
constexpr uint32_t ALU_REGISTER_R_9 = 0x9;
|
||||
constexpr uint32_t ALU_REGISTER_R_10 = 0xA;
|
||||
constexpr uint32_t ALU_REGISTER_R_11 = 0xB;
|
||||
constexpr uint32_t ALU_REGISTER_R_12 = 0xC;
|
||||
constexpr uint32_t ALU_REGISTER_R_13 = 0xD;
|
||||
constexpr uint32_t ALU_REGISTER_R_14 = 0xE;
|
||||
constexpr uint32_t ALU_REGISTER_R_15 = 0xF;
|
||||
|
||||
constexpr uint32_t ALU_REGISTER_R_SRCA = 0x20;
|
||||
constexpr uint32_t ALU_REGISTER_R_SRCB = 0x21;
|
||||
constexpr uint32_t ALU_REGISTER_R_ACCU = 0x31;
|
||||
constexpr uint32_t ALU_REGISTER_R_ZF = 0x32;
|
||||
constexpr uint32_t ALU_REGISTER_R_CF = 0x33;
|
||||
|
||||
constexpr uint32_t GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW = 0x23A8;
|
||||
49
shared/source/helpers/registered_method_dispatcher.h
Normal file
49
shared/source/helpers/registered_method_dispatcher.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename MethodArgsT, typename EstimateMethodArgsT>
|
||||
class RegisteredMethodDispatcher {
|
||||
public:
|
||||
using CommandsSizeEstimationMethodT = std::function<EstimateMethodArgsT>;
|
||||
using RegisteredMethodT = std::function<MethodArgsT>;
|
||||
|
||||
void registerMethod(RegisteredMethodT method) {
|
||||
this->method = method;
|
||||
}
|
||||
|
||||
void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) {
|
||||
this->commandsEstimationMethod = method;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void operator()(Args &&... args) const {
|
||||
if (method) {
|
||||
method(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
size_t estimateCommandsSize(Args &&... args) const {
|
||||
if (commandsEstimationMethod) {
|
||||
return commandsEstimationMethod(std::forward<Args>(args)...);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
CommandsSizeEstimationMethodT commandsEstimationMethod;
|
||||
RegisteredMethodT method;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
14
shared/source/helpers/simd_helper.h
Normal file
14
shared/source/helpers/simd_helper.h
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
template <typename WALKER_TYPE>
|
||||
constexpr typename WALKER_TYPE::SIMD_SIZE getSimdConfig(uint32_t simdSize) {
|
||||
return static_cast<typename WALKER_TYPE::SIMD_SIZE>((simdSize == 1) ? (32 >> 4) : (simdSize >> 4));
|
||||
}
|
||||
46
shared/source/helpers/state_base_address.h
Normal file
46
shared/source/helpers/state_base_address.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class GmmHelper;
|
||||
class IndirectHeap;
|
||||
class LinearStream;
|
||||
struct DispatchFlags;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct StateBaseAddressHelper {
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
|
||||
static void programStateBaseAddress(
|
||||
LinearStream &commandStream,
|
||||
const IndirectHeap *dsh,
|
||||
const IndirectHeap *ioh,
|
||||
const IndirectHeap *ssh,
|
||||
uint64_t generalStateBase,
|
||||
bool setGeneralStateBaseAddress,
|
||||
uint32_t statelessMocsIndex,
|
||||
uint64_t internalHeapBase,
|
||||
bool setInstructionStateBaseAddress,
|
||||
GmmHelper *gmmHelper,
|
||||
bool isMultiOsContextCapable);
|
||||
|
||||
static void appendStateBaseAddressParameters(
|
||||
STATE_BASE_ADDRESS *stateBaseAddress,
|
||||
const IndirectHeap *ssh,
|
||||
bool setGeneralStateBaseAddress,
|
||||
uint64_t internalHeapBase,
|
||||
GmmHelper *gmmHelper,
|
||||
bool isMultiOsContextCapable);
|
||||
|
||||
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);
|
||||
};
|
||||
} // namespace NEO
|
||||
79
shared/source/helpers/state_base_address_base.inl
Normal file
79
shared/source/helpers/state_base_address_base.inl
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gmm_helper/gmm_helper.h"
|
||||
#include "helpers/cache_policy.h"
|
||||
#include "helpers/hw_cmds.h"
|
||||
#include "helpers/state_base_address.h"
|
||||
#include "indirect_heap/indirect_heap.h"
|
||||
#include "memory_manager/memory_constants.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
LinearStream &commandStream,
|
||||
const IndirectHeap *dsh,
|
||||
const IndirectHeap *ioh,
|
||||
const IndirectHeap *ssh,
|
||||
uint64_t generalStateBase,
|
||||
bool setGeneralStateBaseAddress,
|
||||
uint32_t statelessMocsIndex,
|
||||
uint64_t internalHeapBase,
|
||||
bool setInstructionStateBaseAddress,
|
||||
GmmHelper *gmmHelper,
|
||||
bool isMultiOsContextCapable) {
|
||||
|
||||
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
*pCmd = GfxFamily::cmdInitStateBaseAddress;
|
||||
|
||||
if (dsh) {
|
||||
pCmd->setDynamicStateBaseAddressModifyEnable(true);
|
||||
pCmd->setDynamicStateBufferSizeModifyEnable(true);
|
||||
pCmd->setDynamicStateBaseAddress(dsh->getHeapGpuBase());
|
||||
pCmd->setDynamicStateBufferSize(dsh->getHeapSizeInPages());
|
||||
}
|
||||
|
||||
if (ioh) {
|
||||
pCmd->setIndirectObjectBaseAddressModifyEnable(true);
|
||||
pCmd->setIndirectObjectBufferSizeModifyEnable(true);
|
||||
pCmd->setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
|
||||
pCmd->setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
|
||||
}
|
||||
|
||||
if (ssh) {
|
||||
pCmd->setSurfaceStateBaseAddressModifyEnable(true);
|
||||
pCmd->setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
|
||||
}
|
||||
|
||||
if (setInstructionStateBaseAddress) {
|
||||
pCmd->setInstructionBaseAddressModifyEnable(true);
|
||||
pCmd->setInstructionBaseAddress(internalHeapBase);
|
||||
pCmd->setInstructionBufferSizeModifyEnable(true);
|
||||
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||
pCmd->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
|
||||
}
|
||||
|
||||
if (setGeneralStateBaseAddress) {
|
||||
pCmd->setGeneralStateBaseAddressModifyEnable(true);
|
||||
pCmd->setGeneralStateBufferSizeModifyEnable(true);
|
||||
// GSH must be set to 0 for stateless
|
||||
pCmd->setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
|
||||
pCmd->setGeneralStateBufferSize(0xfffff);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.OverrideStatelessMocsIndex.get() != -1) {
|
||||
statelessMocsIndex = DebugManager.flags.OverrideStatelessMocsIndex.get();
|
||||
}
|
||||
|
||||
statelessMocsIndex = statelessMocsIndex << 1;
|
||||
|
||||
pCmd->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
|
||||
|
||||
appendStateBaseAddressParameters(pCmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
26
shared/source/helpers/state_base_address_bdw_plus.inl
Normal file
26
shared/source/helpers/state_base_address_bdw_plus.inl
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/state_base_address_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||
STATE_BASE_ADDRESS *stateBaseAddress,
|
||||
const IndirectHeap *ssh,
|
||||
bool setGeneralStateBaseAddress,
|
||||
uint64_t internalHeapBase,
|
||||
GmmHelper *gmmHelper,
|
||||
bool isMultiOsContextCapable) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
17
shared/source/helpers/state_compute_mode_helper.h
Normal file
17
shared/source/helpers/state_compute_mode_helper.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/csr_definitions.h"
|
||||
#include "helpers/hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
struct StateComputeModeHelper {
|
||||
static bool isStateComputeModeRequired(CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed);
|
||||
};
|
||||
} // namespace NEO
|
||||
31
shared/source/helpers/stdio.h
Normal file
31
shared/source/helpers/stdio.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#ifndef _WIN32
|
||||
#ifndef __STDC_LIB_EXT1__
|
||||
#if __STDC_WANT_LIB_EXT1__ != 1
|
||||
|
||||
#include <cstdio>
|
||||
#include <errno.h>
|
||||
|
||||
inline int fopen_s(FILE **pFile, const char *filename, const char *mode) {
|
||||
if ((pFile == nullptr) || (filename == nullptr) || (mode == nullptr)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*pFile = fopen(filename, mode);
|
||||
if (*pFile == nullptr) {
|
||||
return -errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
106
shared/source/helpers/string.h
Normal file
106
shared/source/helpers/string.h
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
|
||||
inline int strcpy_s(char *dst, size_t dstSize, const char *src) {
|
||||
if ((dst == nullptr) || (src == nullptr)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
size_t length = strlen(src);
|
||||
if (dstSize <= length) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
memcpy(dst, src, length);
|
||||
dst[length] = '\0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int strncpy_s(char *dst, size_t numberOfElements, const char *src, size_t count) {
|
||||
if ((dst == nullptr) || (src == nullptr)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (numberOfElements < count) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
size_t length = strlen(src);
|
||||
if (length > count) {
|
||||
length = count;
|
||||
}
|
||||
memcpy(dst, src, length);
|
||||
|
||||
if (length < numberOfElements) {
|
||||
numberOfElements = length;
|
||||
}
|
||||
dst[numberOfElements] = '\0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline size_t strnlen_s(const char *str, size_t count) {
|
||||
if (str == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
if (str[i] == '\0')
|
||||
return i;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
inline int memcpy_s(void *dst, size_t destSize, const void *src, size_t count) {
|
||||
if ((dst == nullptr) || (src == nullptr)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (destSize < count) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
memcpy(dst, src, count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int memmove_s(void *dst, size_t numberOfElements, const void *src, size_t count) {
|
||||
if ((dst == nullptr) || (src == nullptr)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (numberOfElements < count) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
memmove(dst, src, count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T = char>
|
||||
inline std::unique_ptr<T[]> makeCopy(const void *src, size_t size) {
|
||||
if (size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
using ElT = typename std::remove_all_extents<T>::type;
|
||||
std::unique_ptr<T[]> copiedData(new ElT[size]);
|
||||
memcpy_s(copiedData.get(), size, src, size);
|
||||
return copiedData;
|
||||
}
|
||||
254
shared/source/helpers/surface_format_info.h
Normal file
254
shared/source/helpers/surface_format_info.h
Normal file
@@ -0,0 +1,254 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "gmm_helper/gmm_lib.h"
|
||||
|
||||
namespace NEO {
|
||||
enum GFX3DSTATE_SURFACEFORMAT : unsigned short {
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT = 0x000,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT = 0x001,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT = 0x002,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UNORM = 0x003,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SNORM = 0x004,
|
||||
GFX3DSTATE_SURFACEFORMAT_R64G64_FLOAT = 0x005,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32X32_FLOAT = 0x006,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SSCALED = 0x007,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_USCALED = 0x008,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_FLOAT = 0x040,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SINT = 0x041,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_UINT = 0x042,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_UNORM = 0x043,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SNORM = 0x044,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SSCALED = 0x045,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32B32_USCALED = 0x046,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM = 0x080,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM = 0x081,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT = 0x082,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT = 0x083,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT = 0x084,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT = 0x085,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_SINT = 0x086,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_UINT = 0x087,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS = 0x088,
|
||||
GFX3DSTATE_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT = 0x089,
|
||||
GFX3DSTATE_SURFACEFORMAT_L32A32_FLOAT = 0x08A,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_UNORM = 0x08B,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_SNORM = 0x08C,
|
||||
GFX3DSTATE_SURFACEFORMAT_R64_FLOAT = 0x08D,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_UNORM = 0x08E,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_FLOAT = 0x08F,
|
||||
GFX3DSTATE_SURFACEFORMAT_A32X32_FLOAT = 0x090,
|
||||
GFX3DSTATE_SURFACEFORMAT_L32X32_FLOAT = 0x091,
|
||||
GFX3DSTATE_SURFACEFORMAT_I32X32_FLOAT = 0x092,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SSCALED = 0x093,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_USCALED = 0x094,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_SSCALED = 0x095,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32G32_USCALED = 0x096,
|
||||
GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM = 0x0C0,
|
||||
GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB = 0x0C1,
|
||||
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM = 0x0C2,
|
||||
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB = 0x0C3,
|
||||
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UINT = 0x0C4,
|
||||
GFX3DSTATE_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM = 0x0C5,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM = 0x0C7,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB = 0x0C8,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM = 0x0C9,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT = 0x0CA,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT = 0x0CB,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM = 0x0CC,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM = 0x0CD,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_SINT = 0x0CE,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_UINT = 0x0CF,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT = 0x0D0,
|
||||
GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM = 0x0D1,
|
||||
GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB = 0x0D2,
|
||||
GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT = 0x0D3,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_SINT = 0x0D6,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_UINT = 0x0D7,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_FLOAT = 0x0D8,
|
||||
GFX3DSTATE_SURFACEFORMAT_R24_UNORM_X8_TYPELESS = 0x0D9,
|
||||
GFX3DSTATE_SURFACEFORMAT_X24_TYPELESS_G8_UINT = 0x0DA,
|
||||
GFX3DSTATE_SURFACEFORMAT_L16A16_UNORM = 0x0DF,
|
||||
GFX3DSTATE_SURFACEFORMAT_I24X8_UNORM = 0x0E0,
|
||||
GFX3DSTATE_SURFACEFORMAT_L24X8_UNORM = 0x0E1,
|
||||
GFX3DSTATE_SURFACEFORMAT_A24X8_UNORM = 0x0E2,
|
||||
GFX3DSTATE_SURFACEFORMAT_I32_FLOAT = 0x0E3,
|
||||
GFX3DSTATE_SURFACEFORMAT_L32_FLOAT = 0x0E4,
|
||||
GFX3DSTATE_SURFACEFORMAT_A32_FLOAT = 0x0E5,
|
||||
GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM = 0x0E9,
|
||||
GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB = 0x0EA,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM = 0x0EB,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB = 0x0EC,
|
||||
GFX3DSTATE_SURFACEFORMAT_R9G9B9E5_SHAREDEXP = 0x0ED,
|
||||
GFX3DSTATE_SURFACEFORMAT_B10G10R10X2_UNORM = 0x0EE,
|
||||
GFX3DSTATE_SURFACEFORMAT_L16A16_FLOAT = 0x0F0,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_UNORM = 0x0F1,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_SNORM = 0x0F2,
|
||||
GFX3DSTATE_SURFACEFORMAT_R10G10B10X2_USCALED = 0x0F3,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SSCALED = 0x0F4,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_USCALED = 0x0F5,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_SSCALED = 0x0F6,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16_USCALED = 0x0F7,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_SSCALED = 0x0F8,
|
||||
GFX3DSTATE_SURFACEFORMAT_R32_USCALED = 0x0F9,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM = 0x100,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM_SRGB = 0x101,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM = 0x102,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB = 0x103,
|
||||
GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM = 0x104,
|
||||
GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB = 0x105,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM = 0x106,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM = 0x107,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_SINT = 0x108,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_UINT = 0x109,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_UNORM = 0x10A,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_SNORM = 0x10B,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_SINT = 0x10C,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_UINT = 0x10D,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_FLOAT = 0x10E,
|
||||
GFX3DSTATE_SURFACEFORMAT_I16_UNORM = 0x111,
|
||||
GFX3DSTATE_SURFACEFORMAT_L16_UNORM = 0x112,
|
||||
GFX3DSTATE_SURFACEFORMAT_A16_UNORM = 0x113,
|
||||
GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM = 0x114,
|
||||
GFX3DSTATE_SURFACEFORMAT_I16_FLOAT = 0x115,
|
||||
GFX3DSTATE_SURFACEFORMAT_L16_FLOAT = 0x116,
|
||||
GFX3DSTATE_SURFACEFORMAT_A16_FLOAT = 0x117,
|
||||
GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM_SRGB = 0x118,
|
||||
GFX3DSTATE_SURFACEFORMAT_R5G5_SNORM_B6_UNORM = 0x119,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM = 0x11A,
|
||||
GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB = 0x11B,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_SSCALED = 0x11C,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8_USCALED = 0x11D,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_SSCALED = 0x11E,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16_USCALED = 0x11F,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_UNORM = 0x140,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_SNORM = 0x141,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_SINT = 0x142,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_UINT = 0x143,
|
||||
GFX3DSTATE_SURFACEFORMAT_A8_UNORM = 0x144,
|
||||
GFX3DSTATE_SURFACEFORMAT_I8_UNORM = 0x145,
|
||||
GFX3DSTATE_SURFACEFORMAT_L8_UNORM = 0x146,
|
||||
GFX3DSTATE_SURFACEFORMAT_P4A4_UNORM = 0x147,
|
||||
GFX3DSTATE_SURFACEFORMAT_A4P4_UNORM = 0x148,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_SSCALED = 0x149,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8_USCALED = 0x14A,
|
||||
GFX3DSTATE_SURFACEFORMAT_P8_UNORM = 0x14B,
|
||||
GFX3DSTATE_SURFACEFORMAT_L8_UNORM_SRGB = 0x14C,
|
||||
GFX3DSTATE_SURFACEFORMAT_DXT1_RGB_SRGB = 0x180,
|
||||
GFX3DSTATE_SURFACEFORMAT_R1_UINT = 0x181,
|
||||
GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL = 0x182,
|
||||
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY = 0x183,
|
||||
GFX3DSTATE_SURFACEFORMAT_P2_UNORM = 0x184,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC1_UNORM = 0x186,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC2_UNORM = 0x187,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC3_UNORM = 0x188,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC4_UNORM = 0x189,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC5_UNORM = 0x18A,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC1_UNORM_SRGB = 0x18B,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC2_UNORM_SRGB = 0x18C,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC3_UNORM_SRGB = 0x18D,
|
||||
GFX3DSTATE_SURFACEFORMAT_MONO8 = 0x18E,
|
||||
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV = 0x18F,
|
||||
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY = 0x190,
|
||||
GFX3DSTATE_SURFACEFORMAT_DXT1_RGB = 0x191,
|
||||
GFX3DSTATE_SURFACEFORMAT_FXT1 = 0x192,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8_UNORM = 0x193,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8_SNORM = 0x194,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8_SSCALED = 0x195,
|
||||
GFX3DSTATE_SURFACEFORMAT_R8G8B8_USCALED = 0x196,
|
||||
GFX3DSTATE_SURFACEFORMAT_R64G64B64A64_FLOAT = 0x197,
|
||||
GFX3DSTATE_SURFACEFORMAT_R64G64B64_FLOAT = 0x198,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC4_SNORM = 0x199,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC5_SNORM = 0x19A,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16_FLOAT = 0x19B,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16_UNORM = 0x19C,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16_SNORM = 0x19D,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16_SSCALED = 0x19E,
|
||||
GFX3DSTATE_SURFACEFORMAT_R16G16B16_USCALED = 0x19F,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC6H_SF16 = 0x1A1,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC7_UNORM = 0x1A2,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC7_UNORM_SRGB = 0x1A3,
|
||||
GFX3DSTATE_SURFACEFORMAT_BC6H_UF16 = 0x1A4,
|
||||
GFX3DSTATE_SURFACEFORMAT_NV12 = 0x1A5,
|
||||
GFX3DSTATE_SURFACEFORMAT_RAW = 0x1FF,
|
||||
NUM_GFX3DSTATE_SURFACEFORMATS
|
||||
};
|
||||
|
||||
enum class ImagePlane {
|
||||
NO_PLANE = 0,
|
||||
PLANE_Y,
|
||||
PLANE_U,
|
||||
PLANE_V,
|
||||
PLANE_UV
|
||||
};
|
||||
|
||||
struct SurfaceFormatInfo {
|
||||
GMM_RESOURCE_FORMAT GMMSurfaceFormat;
|
||||
GFX3DSTATE_SURFACEFORMAT GenxSurfaceFormat;
|
||||
uint32_t GMMTileWalk;
|
||||
uint32_t NumChannels;
|
||||
uint32_t PerChannelSizeInBytes;
|
||||
size_t ImageElementSizeInBytes;
|
||||
};
|
||||
|
||||
enum class ImageType {
|
||||
Invalid,
|
||||
Image1D,
|
||||
Image2D,
|
||||
Image3D,
|
||||
Image1DArray,
|
||||
Image2DArray,
|
||||
Image1DBuffer
|
||||
};
|
||||
|
||||
struct ImageDescriptor {
|
||||
ImageType imageType;
|
||||
size_t imageWidth;
|
||||
size_t imageHeight;
|
||||
size_t imageDepth;
|
||||
size_t imageArraySize;
|
||||
size_t imageRowPitch;
|
||||
size_t imageSlicePitch;
|
||||
uint32_t numMipLevels;
|
||||
uint32_t numSamples;
|
||||
bool fromParent;
|
||||
};
|
||||
|
||||
struct ImageInfo {
|
||||
ImageDescriptor imgDesc;
|
||||
const SurfaceFormatInfo *surfaceFormat;
|
||||
size_t size;
|
||||
size_t rowPitch;
|
||||
size_t slicePitch;
|
||||
uint32_t qPitch;
|
||||
size_t offset;
|
||||
uint32_t xOffset;
|
||||
uint32_t yOffset;
|
||||
uint32_t yOffsetForUVPlane;
|
||||
GMM_YUV_PLANE_ENUM plane;
|
||||
uint32_t baseMipLevel;
|
||||
uint32_t mipCount;
|
||||
bool linearStorage;
|
||||
bool preferRenderCompression;
|
||||
bool useLocalMemory;
|
||||
};
|
||||
|
||||
struct McsSurfaceInfo {
|
||||
uint32_t pitch;
|
||||
uint32_t qPitch;
|
||||
uint32_t multisampleCount;
|
||||
};
|
||||
|
||||
struct SurfaceOffsets {
|
||||
uint64_t offset;
|
||||
uint32_t xOffset;
|
||||
uint32_t yOffset;
|
||||
uint32_t yOffsetForUVplane;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
66
shared/source/helpers/timestamp_packet.cpp
Normal file
66
shared/source/helpers/timestamp_packet.cpp
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "helpers/timestamp_packet.h"
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "command_stream/linear_stream.h"
|
||||
#include "utilities/tag_allocator.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
void TimestampPacketContainer::add(Node *timestampPacketNode) {
|
||||
timestampPacketNodes.push_back(timestampPacketNode);
|
||||
}
|
||||
|
||||
TimestampPacketContainer::~TimestampPacketContainer() {
|
||||
for (auto node : timestampPacketNodes) {
|
||||
node->returnTag();
|
||||
}
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::swapNodes(TimestampPacketContainer ×tampPacketContainer) {
|
||||
timestampPacketNodes.swap(timestampPacketContainer.timestampPacketNodes);
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
|
||||
std::vector<Node *> pendingNodes;
|
||||
|
||||
for (auto node : timestampPacketNodes) {
|
||||
if (node->canBeReleased() || clearAllDependencies) {
|
||||
node->returnTag();
|
||||
} else {
|
||||
pendingNodes.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
std::swap(timestampPacketNodes, pendingNodes);
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
auto &inputNodes = inputTimestampPacketContainer.peekNodes();
|
||||
std::copy(inputNodes.begin(), inputNodes.end(), std::back_inserter(timestampPacketNodes));
|
||||
|
||||
for (auto node : inputNodes) {
|
||||
node->incRefCount();
|
||||
}
|
||||
}
|
||||
|
||||
void TimestampPacketContainer::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
for (auto node : timestampPacketNodes) {
|
||||
commandStreamReceiver.makeResident(*node->getBaseGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
bool TimestampPacketContainer::isCompleted() const {
|
||||
for (auto node : timestampPacketNodes) {
|
||||
if (!node->tagForCpuAccess->isCompleted()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
179
shared/source/helpers/timestamp_packet.h
Normal file
179
shared/source/helpers/timestamp_packet.h
Normal file
@@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "command_container/command_encoder.h"
|
||||
#include "command_stream/csr_deps.h"
|
||||
#include "helpers/aux_translation.h"
|
||||
#include "helpers/non_copyable_or_moveable.h"
|
||||
#include "utilities/tag_allocator.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class LinearStream;
|
||||
|
||||
namespace TimestampPacketSizeControl {
|
||||
constexpr uint32_t preferredPacketCount = 16u;
|
||||
}
|
||||
|
||||
#pragma pack(1)
|
||||
struct TimestampPacketStorage {
|
||||
struct Packet {
|
||||
uint32_t contextStart = 1u;
|
||||
uint32_t globalStart = 1u;
|
||||
uint32_t contextEnd = 1u;
|
||||
uint32_t globalEnd = 1u;
|
||||
};
|
||||
|
||||
enum class WriteOperationType : uint32_t {
|
||||
BeforeWalker,
|
||||
AfterWalker
|
||||
};
|
||||
|
||||
static GraphicsAllocation::AllocationType getAllocationType() {
|
||||
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
|
||||
}
|
||||
|
||||
bool isCompleted() const {
|
||||
for (uint32_t i = 0; i < packetsUsed; i++) {
|
||||
if ((packets[i].contextEnd & 1) || (packets[i].globalEnd & 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return implicitDependenciesCount.load() == 0;
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
for (auto &packet : packets) {
|
||||
packet.contextStart = 1u;
|
||||
packet.globalStart = 1u;
|
||||
packet.contextEnd = 1u;
|
||||
packet.globalEnd = 1u;
|
||||
}
|
||||
implicitDependenciesCount.store(0);
|
||||
packetsUsed = 1;
|
||||
}
|
||||
|
||||
void incImplicitDependenciesCount() { implicitDependenciesCount++; }
|
||||
|
||||
Packet packets[TimestampPacketSizeControl::preferredPacketCount];
|
||||
std::atomic<uint32_t> implicitDependenciesCount{0u};
|
||||
uint32_t packetsUsed = 1;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPacketStorage),
|
||||
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
|
||||
|
||||
class TimestampPacketContainer : public NonCopyableClass {
|
||||
public:
|
||||
using Node = TagNode<TimestampPacketStorage>;
|
||||
TimestampPacketContainer() = default;
|
||||
TimestampPacketContainer(TimestampPacketContainer &&) = default;
|
||||
TimestampPacketContainer &operator=(TimestampPacketContainer &&) = default;
|
||||
MOCKABLE_VIRTUAL ~TimestampPacketContainer();
|
||||
|
||||
const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
|
||||
void add(Node *timestampPacketNode);
|
||||
void swapNodes(TimestampPacketContainer ×tampPacketContainer);
|
||||
void assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer);
|
||||
void resolveDependencies(bool clearAllDependencies);
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
bool isCompleted() const;
|
||||
|
||||
protected:
|
||||
std::vector<Node *> timestampPacketNodes;
|
||||
};
|
||||
|
||||
struct TimestampPacketDependencies : public NonCopyableClass {
|
||||
TimestampPacketContainer previousEnqueueNodes;
|
||||
TimestampPacketContainer barrierNodes;
|
||||
TimestampPacketContainer auxToNonAuxNodes;
|
||||
TimestampPacketContainer nonAuxToAuxNodes;
|
||||
};
|
||||
|
||||
struct TimestampPacketHelper {
|
||||
template <typename GfxFamily>
|
||||
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto compareAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
auto dependenciesCountAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount);
|
||||
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) {
|
||||
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
|
||||
auto miSemaphoreCmd = cmdStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
|
||||
EncodeSempahore<GfxFamily>::programMiSemaphoreWait(miSemaphoreCmd, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
}
|
||||
|
||||
timestampPacketNode.tagForCpuAccess->incImplicitDependenciesCount();
|
||||
|
||||
auto miAtomic = cmdStream.getSpaceForCmd<MI_ATOMIC>();
|
||||
EncodeAtomic<GfxFamily>::programMiAtomic(miAtomic, dependenciesCountAddress,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependencies(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
|
||||
for (auto timestampPacketContainer : csrDependencies) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
|
||||
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
|
||||
const TimestampPacketDependencies *timestampPacketDependencies) {
|
||||
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
|
||||
? timestampPacketDependencies->auxToNonAuxNodes
|
||||
: timestampPacketDependencies->nonAuxToAuxNodes;
|
||||
|
||||
for (auto &node : container.peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(size_t count) {
|
||||
return count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue() {
|
||||
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> ×tampPacketNode) {
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
|
||||
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies) {
|
||||
size_t totalCommandsSize = 0;
|
||||
for (auto timestampPacketContainer : csrDependencies) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
totalCommandsSize += getRequiredCmdStreamSizeForNodeDependency<GfxFamily>(*node);
|
||||
}
|
||||
}
|
||||
|
||||
return totalCommandsSize;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
62
shared/source/helpers/vec.h
Normal file
62
shared/source/helpers/vec.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
template <typename T>
|
||||
struct Vec3 {
|
||||
Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
|
||||
Vec3(const Vec3 &v) : x(v.x), y(v.y), z(v.z) {}
|
||||
Vec3(const T *arr) {
|
||||
if (arr == nullptr) {
|
||||
x = y = z = 0;
|
||||
} else {
|
||||
x = arr[0];
|
||||
y = arr[1];
|
||||
z = arr[2];
|
||||
}
|
||||
}
|
||||
|
||||
Vec3 &operator=(const Vec3 &arr) {
|
||||
x = arr.x;
|
||||
y = arr.y;
|
||||
z = arr.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vec3<T> &operator=(const T arr[3]) {
|
||||
x = arr[0];
|
||||
y = arr[1];
|
||||
z = arr[2];
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Vec3<T> &vec) const {
|
||||
return ((x == vec.x) && (y == vec.y) && (z == vec.z));
|
||||
}
|
||||
|
||||
bool operator!=(const Vec3<T> &vec) const {
|
||||
return !operator==(vec);
|
||||
}
|
||||
|
||||
unsigned int getSimplifiedDim() const {
|
||||
if (z > 1) {
|
||||
return 3;
|
||||
}
|
||||
if (y > 1) {
|
||||
return 2;
|
||||
}
|
||||
if (x >= 1) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
T x;
|
||||
T y;
|
||||
T z;
|
||||
};
|
||||
13
shared/source/helpers/windows/gmm_callbacks.cpp
Normal file
13
shared/source/helpers/windows/gmm_callbacks.cpp
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) = nullptr;
|
||||
} // namespace NEO
|
||||
26
shared/source/helpers/windows/gmm_callbacks.h
Normal file
26
shared/source/helpers/windows/gmm_callbacks.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
|
||||
template <typename GfxFamily>
|
||||
struct DeviceCallbacks {
|
||||
static long __stdcall notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct TTCallbacks {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
static int __stdcall writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
26
shared/source/helpers/windows/gmm_callbacks.inl
Normal file
26
shared/source/helpers/windows/gmm_callbacks.inl
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "command_stream/linear_stream.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/windows/gmm_callbacks.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
long __stdcall DeviceCallbacks<GfxFamily>::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user