Use L3_CONTROL only for DG1

Related-To: LOCI-1877
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon
2021-02-10 15:20:50 +00:00
committed by Compute-Runtime-Automation
parent a79f67958e
commit 6f555d6258
18 changed files with 1414 additions and 13 deletions

View File

@@ -21,6 +21,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/blit_commands_helper_extra.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cache_flush.inl
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h
${CMAKE_CURRENT_SOURCE_DIR}/common_types.h
@@ -66,6 +67,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/l3_range.h
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.h
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.inl

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/l3_range.h"
#include "shared/source/utilities/range.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
#include "hw_cmds.h"
namespace NEO {
template <typename GfxFamily>
inline void flushGpuCache(LinearStream *commandStream, const Range<L3Range> &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo) {
using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE;
using L3_FLUSH_EVICTION_POLICY = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY;
auto templ = GfxFamily::cmdInitL3ControlWithPostSync;
templ.getBase().setHdcPipelineFlush(true);
HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto isA0Stepping = hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo);
for (const L3Range *it = &*ranges.begin(), *last = &*ranges.rbegin(), *end = &*ranges.end(); it != end; ++it) {
if ((it == last) && (postSyncAddress != 0)) {
auto l3Control = commandStream->getSpaceForCmd<typename GfxFamily::L3_CONTROL>();
auto cmd = GfxFamily::cmdInitL3ControlWithPostSync;
cmd.getBase().setHdcPipelineFlush(templ.getBase().getHdcPipelineFlush());
cmd.getL3FlushAddressRange().setL3FlushEvictionPolicy(L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping);
cmd.getL3FlushAddressRange().setAddress(it->getMaskedAddress(), isA0Stepping);
cmd.getL3FlushAddressRange().setAddressMask(it->getMask(), isA0Stepping);
cmd.getBase().setPostSyncOperation(GfxFamily::L3_CONTROL_BASE::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
cmd.getPostSyncData().setAddress(postSyncAddress);
cmd.getPostSyncData().setImmediateData(0);
*l3Control = cmd;
} else {
auto l3Control = commandStream->getSpaceForCmd<typename GfxFamily::L3_CONTROL>();
templ.getL3FlushAddressRange().setAddress(it->getMaskedAddress(), isA0Stepping);
templ.getL3FlushAddressRange().setAddressMask(it->getMask(), isA0Stepping);
templ.getL3FlushAddressRange().setL3FlushEvictionPolicy(L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping);
*l3Control = templ;
}
}
}
template <typename GfxFamily>
inline size_t getSizeNeededToFlushGpuCache(const Range<L3Range> &ranges, bool usePostSync) {
size_t size = ranges.size() * sizeof(typename GfxFamily::L3_CONTROL);
if (usePostSync) {
UNRECOVERABLE_IF(ranges.size() == 0);
}
return size;
}
} // namespace NEO

View File

@@ -0,0 +1,138 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/utilities/stackvec.h"
#include <cstdint>
#include <limits>
namespace NEO {
static const size_t maxFlushSubrangeCount = 126;
struct L3Range {
static constexpr uint64_t minAlignment = MemoryConstants::pageSize;
static constexpr uint64_t minAlignmentMask = minAlignment - 1ULL;
static constexpr uint64_t minAlignmentBitOffset = Math::ffs(minAlignment);
static constexpr uint64_t maxSingleRange = 4 * MemoryConstants::gigaByte;
static constexpr uint64_t maxMaskValue = Math::ffs(maxSingleRange / minAlignment);
static const uint64_t policySize = 2;
L3Range() = default;
uint64_t getMask() const {
return data.common.mask;
}
void setMask(uint64_t mask) {
data.common.mask = mask;
}
uint64_t getAddress() const {
return data.common.address << L3Range::minAlignmentBitOffset;
}
void setAddress(uint64_t address) {
data.common.address = address >> L3Range::minAlignmentBitOffset;
}
void setPolicy(uint64_t policy) {
data.common.policy = policy;
}
uint64_t getPolicy() const {
return data.common.policy;
}
static constexpr bool meetsMinimumAlignment(uint64_t v) {
return (0 == (v & minAlignmentMask));
}
static uint32_t getMaskFromSize(uint64_t size) {
UNRECOVERABLE_IF(false == Math::isPow2(size));
UNRECOVERABLE_IF((size < minAlignment) || (size > maxSingleRange));
auto ret = Math::ffs(size >> minAlignmentBitOffset);
static_assert(maxMaskValue < std::numeric_limits<uint32_t>::max(), "");
return static_cast<uint32_t>(ret);
}
uint64_t getSizeInBytes() const {
return (1ULL << (minAlignmentBitOffset + getMask()));
}
uint64_t getMaskedAddress() const {
return getAddress() & (~maxNBitValue(minAlignmentBitOffset + getMask()));
}
static L3Range fromAddressSize(uint64_t address, uint64_t size) {
L3Range ret;
ret.setAddress(address);
ret.setMask(getMaskFromSize(size));
return ret;
}
static L3Range fromAddressSizeWithPolicy(uint64_t address, uint64_t size, uint64_t policy) {
L3Range ret = fromAddressSize(address, size);
ret.setPolicy(policy);
return ret;
}
static L3Range fromAddressMask(uint64_t address, uint64_t mask) {
L3Range ret;
ret.setAddress(address);
ret.setMask(mask);
return ret;
}
protected:
union Data {
struct {
uint64_t mask : minAlignmentBitOffset;
uint64_t address : sizeof(uint64_t) * 8 - minAlignmentBitOffset - policySize;
uint64_t policy : policySize;
} common;
uint64_t raw;
} data;
static_assert(sizeof(Data) == sizeof(uint64_t), "");
};
inline bool operator==(const L3Range &lhs, const L3Range &rhs) {
return (lhs.getAddress() == rhs.getAddress()) && (lhs.getMask() == rhs.getMask());
}
inline bool operator!=(const L3Range &lhs, const L3Range &rhs) {
return (false == (lhs == rhs));
}
template <typename ContainerT>
inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret, uint64_t policy) {
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(address));
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(size));
const uint64_t end = address + size;
uint64_t offset = address;
while (offset < end) {
uint64_t maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
uint64_t maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
uint64_t rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.push_back(L3Range::fromAddressSizeWithPolicy(offset, rangeSize, policy));
offset += rangeSize;
}
}
using L3RangesVec = StackVec<L3Range, 32>;
template <typename RetVecT>
inline void coverRangeExact(uint64_t address, uint64_t size, RetVecT &ret, uint64_t policy) {
coverRangeExactImpl(address, size, ret, policy);
}
} // namespace NEO