cl_cache: do not call fcl when not needed

call to FCL can be costly. we don't need this when kernel source is
simple and does not contain '#include'. In this case we can compute hash
directly based on kernel source.

Change-Id: I0455be57d9ee13919a53c145e3feeb00a113d71e
This commit is contained in:
Artur Harasimiuk
2018-04-03 00:00:44 +02:00
committed by sys_ocldev
parent fbf00d38bb
commit 07a63c91df
2 changed files with 94 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,6 +40,12 @@ CompilerInterface *CompilerInterface::pInstance = nullptr;
bool CompilerInterface::useLlvmText = false;
std::mutex CompilerInterface::mtx;
enum CachingMode {
None,
Direct,
PreProcess
};
CompilerInterface::CompilerInterface() = default;
CompilerInterface::~CompilerInterface() = default;
NO_SANITIZE
@@ -69,16 +75,39 @@ cl_int CompilerInterface::build(
}
}
CachingMode cachingMode = None;
if (enableCaching) {
if ((highLevelCodeType == IGC::CodeType::oclC) && (std::strstr(inputArgs.pInput, "#include") == nullptr)) {
cachingMode = CachingMode::Direct;
} else {
cachingMode = CachingMode::PreProcess;
}
}
uint32_t numDevices = static_cast<uint32_t>(program.getNumDevices());
for (uint32_t i = 0; i < numDevices; i++) {
const auto &device = program.getDevice(i);
UNRECOVERABLE_IF(intermediateCodeType == IGC::CodeType::undefined);
bool binaryLoaded = false;
std::string kernelFileHash;
if (cachingMode == CachingMode::Direct) {
kernelFileHash = cache->getCachedFileName(device.getHardwareInfo(),
ArrayRef<const char>(inputArgs.pInput, inputArgs.InputSize),
ArrayRef<const char>(inputArgs.pOptions, inputArgs.OptionsSize),
ArrayRef<const char>(inputArgs.pInternalOptions, inputArgs.InternalOptionsSize));
if (cache->loadCachedBinary(kernelFileHash, program)) {
continue;
}
}
auto inSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), inputArgs.pInput, inputArgs.InputSize);
auto fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), inputArgs.pOptions, inputArgs.OptionsSize);
auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), inputArgs.pInternalOptions, inputArgs.InternalOptionsSize);
CIF::RAII::UPtr_t<CIF::Builtins::BufferSimple> intermediateRepresentation;
if (highLevelCodeType != IGC::CodeType::undefined) {
auto fclTranslationCtx = createFclTranslationCtx(device, highLevelCodeType, intermediateCodeType);
auto fclOutput = translate(fclTranslationCtx.get(), inSrc.get(),
@@ -103,9 +132,7 @@ cl_int CompilerInterface::build(
intermediateRepresentation.reset(inSrc.get());
}
bool binaryLoaded = false;
std::string kernelFileHash;
if (enableCaching) {
if (cachingMode == CachingMode::PreProcess) {
kernelFileHash = cache->getCachedFileName(device.getHardwareInfo(), ArrayRef<const char>(intermediateRepresentation->GetMemory<char>(), intermediateRepresentation->GetSize<char>()),
ArrayRef<const char>(fclOptions->GetMemory<char>(), fclOptions->GetSize<char>()),
ArrayRef<const char>(fclInternalOptions->GetMemory<char>(), fclInternalOptions->GetSize<char>()));

View File

@@ -357,7 +357,7 @@ TEST_F(CompilerInterfaceCachedTests, notCachedAndIgcFailed) {
TranslationArgs inputArgs;
inputArgs.pInput = new char[128];
strcpy_s(inputArgs.pInput, 128, "__kernel k() {}");
strcpy_s(inputArgs.pInput, 128, "#include \"header.h\"\n__kernel k() {}");
inputArgs.InputSize = static_cast<uint32_t>(strlen(inputArgs.pInput));
MockCompilerDebugVars fclDebugVars;
@@ -388,7 +388,7 @@ TEST_F(CompilerInterfaceCachedTests, wasCached) {
TranslationArgs inputArgs;
inputArgs.pInput = new char[128];
strcpy_s(inputArgs.pInput, 128, "__kernel k() {}");
strcpy_s(inputArgs.pInput, 128, "#include \"header.h\"\n__kernel k() {}");
inputArgs.InputSize = static_cast<uint32_t>(strlen(inputArgs.pInput));
MockCompilerDebugVars fclDebugVars;
@@ -419,7 +419,7 @@ TEST_F(CompilerInterfaceCachedTests, builtThenCached) {
TranslationArgs inputArgs;
inputArgs.pInput = new char[128];
strcpy_s(inputArgs.pInput, 128, "__kernel k() {}");
strcpy_s(inputArgs.pInput, 128, "#include \"header.h\"\n__kernel k() {}");
inputArgs.InputSize = static_cast<uint32_t>(strlen(inputArgs.pInput));
MockCompilerDebugVars fclDebugVars;
@@ -441,3 +441,63 @@ TEST_F(CompilerInterfaceCachedTests, builtThenCached) {
gEnvironment->fclPopDebugVars();
gEnvironment->igcPopDebugVars();
}
TEST_F(CompilerInterfaceCachedTests, givenKernelWithoutIncludesAndBinaryInCacheWhenCompilationRequestedThenFCLIsNotCalled) {
MockContext context(pDevice, true);
MockProgram program(&context, false);
BinaryCacheMock cache;
TranslationArgs inputArgs;
inputArgs.pInput = new char[128];
strcpy_s(inputArgs.pInput, 128, "__kernel k() {}");
inputArgs.InputSize = static_cast<uint32_t>(strlen(inputArgs.pInput));
// we force both compilers to fail compilation request
// at the end we expect CL_SUCCESS which means compilation ends in cache
MockCompilerDebugVars fclDebugVars;
fclDebugVars.fileName = gEnvironment->fclGetMockFile();
fclDebugVars.forceBuildFailure = true;
gEnvironment->fclPushDebugVars(fclDebugVars);
MockCompilerDebugVars igcDebugVars;
igcDebugVars.fileName = gEnvironment->igcGetMockFile();
igcDebugVars.forceBuildFailure = true;
gEnvironment->igcPushDebugVars(igcDebugVars);
auto res = pCompilerInterface->replaceBinaryCache(&cache);
cache.loadResult = true;
auto retVal = pCompilerInterface->build(program, inputArgs, true);
EXPECT_EQ(CL_SUCCESS, retVal);
pCompilerInterface->replaceBinaryCache(res);
delete[] inputArgs.pInput;
gEnvironment->fclPopDebugVars();
gEnvironment->igcPopDebugVars();
}
TEST_F(CompilerInterfaceCachedTests, givenKernelWithIncludesAndBinaryInCacheWhenCompilationRequestedThenFCLIsCalled) {
MockContext context(pDevice, true);
MockProgram program(&context, false);
BinaryCacheMock cache;
TranslationArgs inputArgs;
inputArgs.pInput = new char[128];
strcpy_s(inputArgs.pInput, 128, "#include \"file.h\"\n__kernel k() {}");
inputArgs.InputSize = static_cast<uint32_t>(strlen(inputArgs.pInput));
MockCompilerDebugVars fclDebugVars;
fclDebugVars.fileName = gEnvironment->fclGetMockFile();
fclDebugVars.forceBuildFailure = true;
gEnvironment->fclPushDebugVars(fclDebugVars);
auto res = pCompilerInterface->replaceBinaryCache(&cache);
cache.loadResult = true;
auto retVal = pCompilerInterface->build(program, inputArgs, true);
EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, retVal);
pCompilerInterface->replaceBinaryCache(res);
delete[] inputArgs.pInput;
gEnvironment->fclPopDebugVars();
}