Skip to content
GitLab
Explore
Sign in
Andreas Klöckner
loopy
Compare revisions
268a7f4dd4848f076d8342d47225470db31b40cb to a2ad98af81fdc19a7222e94ed93407d113cfcefc
Commits on Source (3)
write race checking: tighten barrier insertion based on address space
· a5b39c23
Kaushik Kulkarni
authored
Jan 28, 2024
and
Andreas Klöckner
committed
Jan 29, 2024
a5b39c23
test whether barrier insertion is cognizant of the variable address spaces
· f8a1ff95
Kaushik Kulkarni
authored
Jan 28, 2024
and
Andreas Klöckner
committed
Jan 29, 2024
f8a1ff95
remove unnecessary lbarrier from tutorial kernel
· a2ad98af
Kaushik Kulkarni
authored
Jan 28, 2024
and
Andreas Klöckner
committed
Jan 29, 2024
a2ad98af
Hide whitespace changes
Inline
Side-by-side
doc/tutorial.rst
View file @
a2ad98af
...
...
@@ -1059,7 +1059,6 @@ earlier:
acc_k = 0.0f;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
a_fetch[lid(0)] = a[16 * gid(0) + lid(0)];
barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
{
for (int k = 0; k <= 15; ++k)
...
...
loopy/schedule/tools.py
View file @
a2ad98af
...
...
@@ -335,11 +335,15 @@ def get_return_from_kernel_mapping(kernel):
# {{{ check for write races in accesses
def
_check_for_access_races
(
map_a
,
insn_a
,
map_b
,
insn_b
,
knl
,
callables_table
):
def
_check_for_access_races
(
map_a
,
insn_a
,
map_b
,
insn_b
,
knl
,
callables_table
,
address_space
):
"""
Returns *True* if the execution instances of *insn_a* and *insn_b*, accessing
the same variable via access maps *map_a* and *map_b*, result in an access race.
:arg address_space: An instance of :class:`loopy.kernel.data.AddressSpace`
of the variable whose accesses are being checked for a race.
.. note::
The accesses ``map_a``, ``map_b`` lead to write races iff there exists 2
...
...
@@ -348,9 +352,12 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table):
import
pymbolic.primitives
as
p
from
loopy.symbolic
import
isl_set_from_expr
,
aff_from_expr
,
aff_to_expr
from
loopy.kernel.data
import
(
filter_iname_tags_by_type
,
HardwareConcurrentTag
)
HardwareConcurrentTag
,
AddressSpace
)
from
loopy.kernel.tools
import
get_hw_axis_base_for_codegen
assert
address_space
in
[
AddressSpace
.
LOCAL
,
AddressSpace
.
GLOBAL
]
gsize
,
lsize
=
knl
.
get_grid_size_upper_bounds
(
callables_table
,
return_dict
=
True
)
...
...
@@ -470,25 +477,40 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table):
# {{{ Step 5: create the set any(l.i.A != l.i.B) OR any(g.i.A != g.i.B)
space
=
set_a
.
space
unequal_global_id_set
=
isl
.
Set
.
empty
(
set_a
.
get_space
())
unequal_local_id_set
=
isl
.
Set
.
empty
(
set_a
.
get_space
())
unequal_group_id_set
=
isl
.
Set
.
empty
(
set_a
.
get_space
())
equal_group_id_set
=
isl
.
BasicSet
.
universe
(
set_a
.
get_space
())
for
i_l
in
lsize
:
lid_a
=
p
.
Variable
(
f
"
l.
{
i_l
}
.A
"
)
lid_b
=
p
.
Variable
(
f
"
l.
{
i_l
}
.B
"
)
unequal_
g
lo
b
al_id_set
|=
(
isl_set_from_expr
(
space
,
p
.
Comparison
(
lid_a
,
"
!=
"
,
lid_b
))
)
unequal_lo
c
al_id_set
|=
(
isl_set_from_expr
(
space
,
p
.
Comparison
(
lid_a
,
"
!=
"
,
lid_b
))
)
for
i_g
in
gsize
:
gid_a
=
p
.
Variable
(
f
"
g.
{
i_g
}
.A
"
)
gid_b
=
p
.
Variable
(
f
"
g.
{
i_g
}
.B
"
)
unequal_global_id_set
|=
(
isl_set_from_expr
(
space
,
p
.
Comparison
(
gid_a
,
"
!=
"
,
gid_b
))
)
unequal_group_id_set
|=
(
isl_set_from_expr
(
space
,
p
.
Comparison
(
gid_a
,
"
!=
"
,
gid_b
))
)
equal_group_id_set
&=
(
isl_set_from_expr
(
space
,
p
.
Comparison
(
gid_a
,
"
==
"
,
gid_b
))
)
# }}}
return
not
(
set_a
&
set_b
&
unequal_global_id_set
).
is_empty
()
if
address_space
==
AddressSpace
.
GLOBAL
:
return
not
(
set_a
&
set_b
&
(
unequal_local_id_set
|
unequal_group_id_set
)
).
is_empty
()
else
:
return
not
(
set_a
&
set_b
&
unequal_local_id_set
&
equal_group_id_set
).
is_empty
()
class
AccessMapDescriptor
(
enum
.
Enum
):
...
...
@@ -582,7 +604,10 @@ class WriteRaceChecker:
return
_check_for_access_races
(
insn1_amap
,
self
.
kernel
.
id_to_insn
[
insn1
],
insn2_amap
,
self
.
kernel
.
id_to_insn
[
insn2
],
self
.
kernel
,
self
.
callables_table
)
self
.
kernel
,
self
.
callables_table
,
(
self
.
kernel
.
get_var_descriptor
(
var_name
)
.
address_space
))
# }}}
...
...
test/test_loopy.py
View file @
a2ad98af
...
...
@@ -3644,6 +3644,30 @@ def test_barrier_non_zero_hw_lbound():
assert
barrier_between
(
knl
,
"
w_a
"
,
"
w_b
"
)
def
test_no_unnecessary_lbarrier
(
ctx_factory
):
# This regression would fail on loopy.git <= 268a7f4
# (Issue reported by @thilinarmtb)
t_unit
=
lp
.
make_kernel
(
"
{[i_outer, i_inner]: 0 <= i_outer < n and 0 <= i_inner < 16}
"
,
"""
<> s_a[i_inner] = ai[i_outer * 16 + i_inner] {id=write_s_a}
ao[i_outer * 16 + i_inner] = 2.0 * s_a[i_inner] {id=write_ao, dep=write_s_a}
"""
,
assumptions
=
"
n>=0
"
)
t_unit
=
lp
.
add_dtypes
(
t_unit
,
dict
(
ai
=
np
.
float32
))
t_unit
=
lp
.
tag_inames
(
t_unit
,
dict
(
i_inner
=
"
l.0
"
,
i_outer
=
"
g.0
"
))
t_unit
=
lp
.
set_temporary_address_space
(
t_unit
,
"
s_a
"
,
"
local
"
)
t_unit
=
lp
.
prioritize_loops
(
t_unit
,
"
i_outer,i_inner
"
)
t_unit
=
lp
.
preprocess_kernel
(
t_unit
)
knl
=
lp
.
get_one_linearized_kernel
(
t_unit
.
default_entrypoint
,
t_unit
.
callables_table
)
assert
not
barrier_between
(
knl
,
"
write_s_a
"
,
"
write_ao
"
)
if
__name__
==
"
__main__
"
:
if
len
(
sys
.
argv
)
>
1
:
exec
(
sys
.
argv
[
1
])
...
...