Fail build program in shared system USM + statefull access case OCL

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-6075
This commit is contained in:
Kamil Kopryk
2021-10-21 10:03:58 +00:00
committed by Compute-Runtime-Automation
parent b98cfdda17
commit 9dabc2db0c
12 changed files with 155 additions and 8 deletions

View File

@@ -34,8 +34,7 @@ cl_int Program::build(
const char *buildOptions,
bool enableCaching) {
cl_int retVal = CL_SUCCESS;
std::string internalOptions;
initInternalOptions(internalOptions);
auto defaultClDevice = deviceVector[0];
UNRECOVERABLE_IF(defaultClDevice == nullptr);
auto &defaultDevice = defaultClDevice->getDevice();
@@ -69,6 +68,9 @@ cl_int Program::build(
} else if (this->createdFrom != CreatedFrom::BINARY) {
options = "";
}
std::string internalOptions;
initInternalOptions(internalOptions);
extractInternalOptions(options, internalOptions);
applyAdditionalOptions(internalOptions);
@@ -166,6 +168,10 @@ cl_int Program::build(
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
}
if (containsStatefulAccess(defaultDevice.getRootDeviceIndex()) && forceToStatelessNeeded() && !isBuiltIn) {
retVal = CL_BUILD_PROGRAM_FAILURE;
}
if (retVal != CL_SUCCESS) {
break;
}

View File

@@ -66,6 +66,7 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
kernelDebugEnabled = clDevices[0]->isDebuggerActive();
}
void Program::initInternalOptions(std::string &internalOptions) const {
auto pClDevice = clDevices[0];
auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess;
internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion());
@@ -74,7 +75,7 @@ void Program::initInternalOptions(std::string &internalOptions) const {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
}
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
if ((isBuiltIn && is32bit) || forceToStatelessNeeded() ||
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
}
@@ -136,6 +137,20 @@ Program::~Program() {
}
}
bool Program::forceToStatelessNeeded() const {
auto preferStateful = false;
if (auto it = options.find(NEO::CompilerOptions::smallerThan4gbBuffersOnly.data()); it != std::string::npos) {
preferStateful = true;
}
if (DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) {
preferStateful = static_cast<bool>(DebugManager.flags.UseSmallerThan4gbBuffersOnly.get());
}
auto forceStateless = !preferStateful && clDevices[0]->areSharedSystemAllocationsAllowed();
return forceStateless;
}
cl_int Program::createProgramFromBinary(
const void *pBinary,
size_t binarySize, ClDevice &clDevice) {
@@ -489,6 +504,22 @@ cl_int Program::packDeviceBinary(ClDevice &clDevice) {
return CL_SUCCESS;
}
bool Program::containsStatefulAccess(uint32_t rootDeviceIndex) const {
auto &buildInfo = buildInfos[rootDeviceIndex];
for (const auto &kernelInfo : buildInfo.kernelInfoArray) {
for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) {
auto isStatefulAccess = arg.is<ArgDescriptor::ArgTPointer>() &&
(isValidOffset(arg.as<ArgDescPointer>().bindless) ||
isValidOffset(arg.as<ArgDescPointer>().bindful));
if (isStatefulAccess) {
return true;
}
}
}
return false;
}
void Program::setBuildStatus(cl_build_status status) {
for (auto &deviceBuildInfo : deviceBuildInfos) {
deviceBuildInfo.second.buildStatus = status;

View File

@@ -283,6 +283,10 @@ class Program : public BaseObject<_cl_program> {
}
protected:
bool forceToStatelessNeeded() const;
MOCKABLE_VIRTUAL bool containsStatefulAccess(uint32_t rootDeviceIndex) const;
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice);
cl_int packDeviceBinary(ClDevice &clDevice);
@@ -366,6 +370,7 @@ class Program : public BaseObject<_cl_program> {
bool isBuiltIn = false;
bool kernelDebugEnabled = false;
bool containsStatefulAccesses = false;
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
std::mutex lockMutex;
uint32_t exposedKernels = 0;