Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Showing
with 6079 additions and 4286 deletions
This diff is collapsed.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#define int_floor_div_pos_b(a,b) ( ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) )
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float4 const *__restrict__ a, int const n, __global float4 *__restrict__ out)
{
/* bulk slab for 'i_outer' */
for (int i_outer = 0; i_outer <= -2 + int_floor_div_pos_b(3 + n, 4); ++i_outer)
out[i_outer] = 2.0f * a[i_outer];
/* final slab for 'i_outer' */
{
int const i_outer = -1 + n + -1 * int_floor_div_pos_b(3 * n, 4);
if (-1 + n >= 0)
{
if (-1 + -4 * i_outer + n >= 0)
out[i_outer].s0 = 2.0f * a[i_outer].s0;
if (-1 + -4 * i_outer + -1 + n >= 0)
out[i_outer].s1 = 2.0f * a[i_outer].s1;
if (-1 + -4 * i_outer + -1 * 2 + n >= 0)
out[i_outer].s2 = 2.0f * a[i_outer].s2;
if (-1 + -4 * i_outer + -1 * 3 + n >= 0)
out[i_outer].s3 = 2.0f * a[i_outer].s3;
}
}
}
import numpy as np
import pyopencl as cl
import pyopencl.array
import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 15 * 10**6
a = cl.array.arange(queue, n, dtype=np.float32)
knl = lp.make_kernel(
"{ [i]: 0<=i<n }",
"out[i] = 2*a[i]")
knl = lp.set_options(knl, write_code=True)
knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="vec")
knl = lp.split_array_axis(knl, "a,out", axis_nr=0, count=4)
knl = lp.tag_array_axes(knl, "a,out", "C,vec")
knl(queue, a=a.reshape(-1, 4), n=n)
This diff is collapsed.
from __future__ import annotations
import loopy.cli
loopy.cli.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from __future__ import division, with_statement
from __future__ import annotations
__copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
......