/* * Copyright (c) 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/gen8/hw_info.h" #include "runtime/command_queue/gpgpu_walker.h" #include "runtime/command_queue/gpgpu_walker.inl" namespace OCLRT { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, ALU_OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); *pCmd = PIPE_CONTROL::sInit(); pCmd->setCommandStreamerStallEnable(true); // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, ALU_OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } } } template <> size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { typedef typename BDWFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename BDWFamily::MI_MATH MI_MATH; typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; if ((pKernel != nullptr) && pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) * 2; // For 2 WADisableLSQCROPERFforOCL WAs } return n; } template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace OCLRT