/*
 * Copyright (C) 2020 Intel Corporation
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them
 * is governed by the express license under which they were provided to you ("License"). Unless
 * the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose
 * or transmit this software or the related documents without Intel's prior written permission.
 *
 * This software and the related documents are provided as is, with no express or implied
 * warranties, other than those that are expressly stated in the License.
*/

// SimpleAdd2 0Gr:0f/0Gr:0f/0Gr:0f/0L:0f 53760000 256 100
// this benchmark only reads from DRAM
__kernel void SimpleAdd2(__global float *pA, __global float *pB, __global float *pC, __local float *pD)
{
    const int id = get_global_id(0);
    const int lid = get_local_id(0);

    if (lid < 256)
    {
        pD[lid] = pA[id] + pB[id] + pC[id];
    }
    else
    {
        // This code has undefined behavior due to the lack of barriers, but it is not supposed to be executed - it is
        // here just to show the compiler the possibility that pD content can be read and used to modify global memory,
        // so the code above can not be thrown away
        for (unsigned i = 0; i < 256; i++) pA[id] += pD[i];
    }
}
