Add test to detect potential race

also add lock inside initialGpgpu

Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-08-08 00:18:54 +00:00
committed by Compute-Runtime-Automation
parent a5614a1c66
commit fea9c9aca7
3 changed files with 278 additions and 22 deletions

View File

@ -130,29 +130,33 @@ CommandQueue::~CommandQueue() {
void CommandQueue::initializeGpgpu() const {
if (gpgpuEngine == nullptr) {
auto &hwInfo = device->getDevice().getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (gpgpuEngine == nullptr) {
auto &hwInfo = device->getDevice().getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto engineRoundRobinAvailable = hwHelper.isAssignEngineRoundRobinSupported(hwInfo) &&
this->isAssignEngineRoundRobinEnabled();
auto engineRoundRobinAvailable = hwHelper.isAssignEngineRoundRobinSupported(hwInfo) &&
this->isAssignEngineRoundRobinEnabled();
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
engineRoundRobinAvailable = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
engineRoundRobinAvailable = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
}
auto assignEngineRoundRobin =
!this->isSpecialCommandQueue &&
!this->queueFamilySelected &&
!(getCmdQueueProperties<cl_queue_priority_khr>(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
engineRoundRobinAvailable;
if (assignEngineRoundRobin) {
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
} else {
this->gpgpuEngine = &device->getDefaultEngine();
}
this->initializeGpgpuInternals();
}
auto assignEngineRoundRobin =
!this->isSpecialCommandQueue &&
!this->queueFamilySelected &&
!(getCmdQueueProperties<cl_queue_priority_khr>(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
engineRoundRobinAvailable;
if (assignEngineRoundRobin) {
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
} else {
this->gpgpuEngine = &device->getDefaultEngine();
}
this->initializeGpgpuInternals();
}
}

View File

@ -5,9 +5,11 @@
*
*/
#include "shared/test/common/helpers/kernel_binary_helper.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
@ -65,6 +67,256 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesS
EXPECT_GE(mockCsr->flushCalledCount, 1);
EXPECT_LE(mockCsr->flushCalledCount, enqueueCount * threadCount);
EXPECT_EQ(mockedSubmissionsAggregator->peekInspectionId() - 1, (uint32_t)mockCsr->flushCalledCount);
}
template <typename GfxFamily>
struct MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
using CommandQueue::bcsInitialized;
};
HWTEST_F(EnqueueKernelTest, givenTwoThreadsAndBscEnabledWhenEnqueueWriteBufferAndEnqueueNDRangeKernelInLoopThenIsNoRace) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
HardwareInfo hwInfo = *defaultHwInfo.get();
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
std::atomic<bool> startEnqueueProcess(false);
auto iterationCount = 40;
auto threadCount = 2;
constexpr size_t n = 256;
unsigned int data[n] = {};
constexpr size_t bufferSize = n * sizeof(unsigned int);
size_t gws[3] = {1, 0, 0};
size_t gwsSize[3] = {n, 1, 1};
size_t lws[3] = {1, 1, 1};
cl_uint workDim = 1;
KernelBinaryHelper kbHelper("CopyBuffer_simd16", false);
std::string testFile;
testFile.append(clFiles);
testFile.append("CopyBuffer_simd16.cl");
size_t sourceSize = 0;
auto pSource = loadDataFromFile(testFile.c_str(), sourceSize);
EXPECT_NE(0u, sourceSize);
EXPECT_NE(nullptr, pSource);
MockClDevice mockClDevice{MockDevice::createWithExecutionEnvironment<MockDevice>(&hwInfo, pDevice->executionEnvironment, 0)};
const cl_device_id deviceId = &mockClDevice;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, context);
auto queue = clCreateCommandQueue(context, deviceId, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, queue);
const char *sources[1] = {pSource.get()};
auto program = clCreateProgramWithSource(
context,
1,
sources,
&sourceSize,
&retVal);
ASSERT_NE(nullptr, program);
retVal = clBuildProgram(
program,
1,
&deviceId,
nullptr,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto kernel = clCreateKernel(program, "CopyBuffer", &retVal);
ASSERT_NE(nullptr, kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
cl_mem_flags flags = CL_MEM_READ_WRITE;
auto buffer0 = clCreateBuffer(context, flags, bufferSize, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
auto buffer1 = clCreateBuffer(context, flags, bufferSize, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer0);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer1);
EXPECT_EQ(CL_SUCCESS, retVal);
auto function = [&]() {
while (!startEnqueueProcess)
;
for (int i = 0; i < iterationCount; i++) {
retVal = clEnqueueWriteBuffer(queue, buffer0, false, 0, bufferSize, data, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clEnqueueNDRangeKernel(queue, kernel, workDim, gws, gwsSize, lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
};
std::vector<std::thread> threads;
for (auto thread = 0; thread < threadCount; thread++) {
threads.push_back(std::thread(function));
}
startEnqueueProcess = true;
for (auto &thread : threads) {
thread.join();
}
EXPECT_TRUE(NEO::castToObject<MockCommandQueueHw<FamilyType>>(queue)->bcsInitialized);
retVal = clFinish(queue);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseMemObject(buffer0);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseMemObject(buffer1);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseKernel(kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseProgram(program);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseCommandQueue(queue);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseContext(context);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueKernelTest, givenBscEnabledWhenThread1EnqueueWriteBufferAndThread2EnqueueNDRangeKernelInLoopThenIsNoRace) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
HardwareInfo hwInfo = *defaultHwInfo.get();
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
std::atomic<bool> startEnqueueProcess(false);
auto iterationCount = 40;
constexpr size_t n = 256;
unsigned int data[n] = {};
constexpr size_t bufferSize = n * sizeof(unsigned int);
size_t gws[3] = {1, 0, 0};
size_t gwsSize[3] = {n, 1, 1};
size_t lws[3] = {1, 1, 1};
cl_uint workDim = 1;
KernelBinaryHelper kbHelper("CopyBuffer_simd16", false);
std::string testFile;
testFile.append(clFiles);
testFile.append("CopyBuffer_simd16.cl");
size_t sourceSize = 0;
auto pSource = loadDataFromFile(testFile.c_str(), sourceSize);
EXPECT_NE(0u, sourceSize);
EXPECT_NE(nullptr, pSource);
MockClDevice mockClDevice{MockDevice::createWithExecutionEnvironment<MockDevice>(&hwInfo, pDevice->executionEnvironment, 0)};
const cl_device_id deviceId = &mockClDevice;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, context);
auto queue = clCreateCommandQueue(context, deviceId, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, queue);
const char *sources[1] = {pSource.get()};
auto program = clCreateProgramWithSource(
context,
1,
sources,
&sourceSize,
&retVal);
ASSERT_NE(nullptr, program);
retVal = clBuildProgram(
program,
1,
&deviceId,
nullptr,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto kernel = clCreateKernel(program, "CopyBuffer", &retVal);
ASSERT_NE(nullptr, kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
cl_mem_flags flags = CL_MEM_READ_WRITE;
auto buffer0 = clCreateBuffer(context, flags, bufferSize, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
auto buffer1 = clCreateBuffer(context, flags, bufferSize, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer0);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer1);
EXPECT_EQ(CL_SUCCESS, retVal);
std::vector<std::thread::id> threadsIds;
auto functionEnqueueWriteBuffer = [&]() {
while (!startEnqueueProcess)
;
for (int i = 0; i < iterationCount; i++) {
retVal = clEnqueueWriteBuffer(queue, buffer0, false, 0, bufferSize, data, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
};
auto functionEnqueueNDRangeKernel = [&]() {
while (!startEnqueueProcess)
;
for (int i = 0; i < iterationCount; i++) {
retVal = clEnqueueNDRangeKernel(queue, kernel, workDim, gws, gwsSize, lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
};
std::vector<std::thread> threads;
threads.push_back(std::thread(functionEnqueueWriteBuffer));
threads.push_back(std::thread(functionEnqueueNDRangeKernel));
startEnqueueProcess = true;
for (auto &thread : threads) {
thread.join();
}
EXPECT_TRUE(NEO::castToObject<MockCommandQueueHw<FamilyType>>(queue)->bcsInitialized);
retVal = clFinish(queue);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseMemObject(buffer0);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseMemObject(buffer1);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseKernel(kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseProgram(program);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseCommandQueue(queue);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseContext(context);
EXPECT_EQ(CL_SUCCESS, retVal);
}

View File

@ -29,4 +29,4 @@ add_custom_command(
COMMAND igdrcl_mt_tests --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=${GTEST_REPEAT} ${GTEST_OUTPUT} ${NEO_TESTS_LISTENER_OPTION} --rev_id ${revision_id}
)
add_dependencies(run_${product}_${revision_id}_mt_unit_tests prepare_test_kernels_for_ocl)
add_dependencies(run_${product}_${revision_id}_mt_unit_tests prepare_test_kernels_for_ocl prepare_test_kernels_for_shared)