diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 08e3c7a7c75cc169ac0f2f3090aab9244b27db43..e8f7ec61cc1b759dc4eef6b79b0f6eb57f8f20d7 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -258,20 +258,8 @@ def generate_code(kernel, with_annotation=False): mod.extend([ LiteralLines(r""" - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) - - - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) - #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) - """), Line()]) @@ -302,6 +290,24 @@ def generate_code(kernel, with_annotation=False): from loopy.codegen.loop import set_up_hw_parallel_loops gen_code = set_up_hw_parallel_loops(kernel, 0, codegen_state) + gen_code_str = str(gen_code) + + if "int_floor_div" in gen_code_str: + mod.extend(""" + #define int_floor_div(a,b) \ + (( (a) - \ + ( ( (a)<0 ) != ( (b)<0 )) \ + *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ + / (b) ) + """) + + if "int_floor_div_pos_b" in gen_code_str: + mod.extend(""" + #define int_floor_div_pos_b(a,b) ( \ + ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ + ) + """) + body.append(Line()) if isinstance(gen_code.ast, Block):