From 653a291b7b0bcd00eba77e42786065276c968fb0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Sep 2017 13:12:51 -0500 Subject: [PATCH 01/18] Add first stab at l^2 stickout tracking --- boxtree/tree.py | 16 +++++- boxtree/tree_build.py | 25 ++++++++-- boxtree/tree_build_kernels.py | 90 +++++++++++++++++++++++++--------- doc/images/linf-l2.png | Bin 0 -> 12818 bytes doc/linf-l2.tikz | 41 ++++++++++++++++ doc/make-images.sh | 3 ++ doc/tikz2png | 57 +++++++++++++++++++++ 7 files changed, 202 insertions(+), 30 deletions(-) create mode 100644 doc/images/linf-l2.png create mode 100644 doc/linf-l2.tikz create mode 100755 doc/make-images.sh create mode 100755 doc/tikz2png diff --git a/boxtree/tree.py b/boxtree/tree.py index 84b0a94..a22a102 100644 --- a/boxtree/tree.py +++ b/boxtree/tree.py @@ -56,7 +56,7 @@ class box_flags_enum(Enum): # noqa # {{{ tree data structure class Tree(DeviceDataRecord): - """A quad/octree consisting of particles sorted into a hierarchy of boxes. + r"""A quad/octree consisting of particles sorted into a hierarchy of boxes. Optionally, particles may be designated 'sources' and 'targets'. They may also be assigned radii which restrict the minimum size of the box into which they may be sorted. @@ -110,6 +110,20 @@ class Tree(DeviceDataRecord): :math:`l^\infty` circles given by :attr:`source_radii` may stick out the box in which they are contained. A scalar. + .. attribute:: extent_norm + + One of ``None``, ``"l2"`` or ``"linf"``. If *None*, particles do not have + extent. If not *None*, indicates the norm with which extent-bearing particles + are determined to lie 'inside' a box's, taking into account the box's + :attr:`stick_out_factor`. + + This image illustrates the difference in semantics: + + .. image:: images/linf-l2.png + + In the figure, the box has radius :math:`R`, the particle has radius + :math:`r`, and :attr:`stick_out_factor` is denoted :math:`\alpha`. + .. attribute:: nsources .. attribute:: ntargets diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 9ad0692..bef9751 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -61,13 +61,13 @@ class TreeBuilder(object): @memoize_method def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_have_extent, + sources_are_targets, srcntgts_extent_norm, kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_have_extent, + sources_are_targets, srcntgts_extent_norm, self.morton_nr_dtype, self.box_level_dtype, kind=kind) @@ -77,7 +77,9 @@ class TreeBuilder(object): max_particles_in_box=None, allocator=None, debug=False, targets=None, source_radii=None, target_radii=None, stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, **kwargs): + max_leaf_refine_weight=None, wait_for=None, + extent_norm="linf", + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -114,6 +116,8 @@ class TreeBuilder(object): :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` instances for whose completion this command waits before starting execution. + :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect + to which particle stick-out is measured. See :attr:`Tree.extent_norm`. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -140,9 +144,19 @@ class TreeBuilder(object): sources_are_targets = targets is None sources_have_extent = source_radii is not None targets_have_extent = target_radii is not None + + if extent_norm not in ["linf", "l2"]: + raise ValueError("unexpected value of 'extent_norm': %s" + % extent_norm) + + srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent + if not srcntgts_have_extent: + srcntgts_extent_norm = None + + del extent_norm - if srcntgts_have_extent and targets is None: + if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " "any kind of radii") @@ -192,7 +206,7 @@ class TreeBuilder(object): knl_info = self.get_kernel_info(dimensions, coord_dtype, particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_have_extent, + sources_are_targets, srcntgts_extent_norm, kind=kind) logger.info("tree build: start") @@ -1559,6 +1573,7 @@ class TreeBuilder(object): root_extent=root_extent, stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, bounding_box=(bbox_min, bbox_max), level_start_box_nrs=level_start_box_nrs, diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index a158865..5970b06 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -324,7 +324,8 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// %for ax in axis_names: // Most FMMs are isotropic, i.e. global_extent_{x,y,z} are all the same. // Nonetheless, the gain from exploiting this assumption seems so - // minimal that doing so here didn't seem worthwhile. + // minimal that doing so here didn't seem worthwhile in the + // srcntgts_extent_norm == "linf" case. coord_t global_min_${ax} = bbox->min_${ax}; coord_t global_extent_${ax} = bbox->max_${ax} - global_min_${ax}; @@ -346,19 +347,24 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// ((srcntgt_${ax} - global_min_${ax}) / global_extent_${ax}) * (1U << (1 + particle_level))); - %if srcntgts_have_extent: - // Need to compute center to compare excess with stick_out_factor. - coord_t next_level_box_center_${ax} = - global_min_${ax} - + global_extent_${ax} - * (${ax}_bits + one_half) - * next_level_box_size_factor; - - coord_t next_level_box_stick_out_radius_${ax} = - box_radius_factor - * global_extent_${ax} - * next_level_box_size_factor; + // Need to compute center to compare excess with stick_out_factor. + // Unused if no stickout, relying on compiler to eliminate this. + const coord_t next_level_box_center_${ax} = + global_min_${ax} + + global_extent_${ax} + * (${ax}_bits + one_half) + * next_level_box_size_factor; + + const coord_t next_level_box_stick_out_radius_${ax} = + box_radius_factor + * global_extent_${ax} + * next_level_box_size_factor; + %endfor + + %if srcntgts_extent_norm == "linf": + %for ax in axis_names: + // stop descent here if particle sticks out of next-level box stop_srcntgt_descent = stop_srcntgt_descent || (srcntgt_${ax} + srcntgt_radius >= next_level_box_center_${ax} @@ -367,8 +373,39 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// (srcntgt_${ax} - srcntgt_radius < next_level_box_center_${ax} - next_level_box_stick_out_radius_${ax}); - %endif - %endfor + %endfor + + %elif srcntgts_extent_norm == "l2": + + coord_t next_level_box_stick_out_radius = + box_radius_factor + * global_extent_x /* assume isotropy */ + * next_level_box_size_factor; + + coord_t next_level_box_center_to_srcntgt_bdry_l2_dist_squared = 0 + %for ax in axis_names: + + (srcntgt_${ax} - next_level_box_center_${ax}) + * (srcntgt_${ax} - next_level_box_center_${ax}) + %endfor + + srcntgt_radius*srcntgt_radius + ; + + // stop descent here if particle sticks out of next-level box + stop_srcntgt_descent = stop_srcntgt_descent || + (next_level_box_center_to_srcntgt_bdry_l2_dist_squared + >= ${dimensions} + * next_level_box_stick_out_radius + * next_level_box_stick_out_radius); + + %elif srcntgts_extent_norm is None: + // nothing to do + + %else: + <% + raise ValueError("unexpected value of 'srcntgts_extent_norm': %s" + % srcntgts_extent_norm) + %> + %endif // Pick off the lowest-order bit for each axis, put it in its place. int level_morton_number = 0 @@ -1244,8 +1281,11 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( def get_tree_build_kernel_info(context, dimensions, coord_dtype, particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_have_extent, + sources_are_targets, srcntgts_extent_norm, morton_nr_dtype, box_level_dtype, kind): + """ + :arg srcntgts_extent_norm: one of ``None``, ``"l2"`` or ``"linf"`` + """ level_restrict = (kind == "adaptive-level-restricted") adaptive = not (kind == "non-adaptive") @@ -1269,7 +1309,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, dev = context.devices[0] morton_bin_count_dtype, _ = make_morton_bin_count_type( dev, dimensions, particle_id_dtype, - srcntgts_have_extent) + srcntgts_have_extent=srcntgts_extent_norm is not None) from boxtree.bounding_box import make_bounding_box_dtype bbox_dtype, bbox_type_decl = make_bounding_box_dtype( @@ -1301,7 +1341,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, level_restrict=level_restrict, sources_are_targets=sources_are_targets, - srcntgts_have_extent=srcntgts_have_extent, + srcntgts_have_extent=srcntgts_extent_norm is not None, + srcntgts_extent_norm=srcntgts_extent_norm, enable_assert=False, enable_printf=False, @@ -1379,12 +1420,12 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, + [VectorArg(coord_dtype, ax) for ax in axis_names] + ([VectorArg(coord_dtype, "srcntgt_radii")] - if srcntgts_have_extent else []) + if srcntgts_extent_norm is not None else []) ) morton_count_scan_arguments = list(common_arguments) - if srcntgts_have_extent: + if srcntgts_extent_norm is not None: morton_count_scan_arguments += [ (ScalarArg(coord_dtype, "stick_out_factor")) ] @@ -1402,7 +1443,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, ] + ["%s" % ax for ax in axis_names] + (["srcntgt_radii, stick_out_factor"] - if srcntgts_have_extent else []))), + if srcntgts_extent_norm is not None else []))), scan_expr="scan_t_add(a, b, across_seg_boundary)", neutral="scan_t_neutral()", is_segment_start_expr="box_start_flags[i]", @@ -1428,7 +1469,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, ), var_values=( ("dimensions", dimensions), - ("srcntgts_have_extent", srcntgts_have_extent), + ("srcntgts_have_extent", srcntgts_extent_norm is not None), + ("srcntgts_extent_norm", srcntgts_extent_norm), ("adaptive", adaptive), ("padded_bin", padded_bin), ("level_restrict", level_restrict), @@ -1520,7 +1562,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, # END KERNELS IN LEVEL LOOP - if srcntgts_have_extent: + if srcntgts_extent_norm is not None: extract_nonchild_srcntgt_count_kernel = \ EXTRACT_NONCHILD_SRCNTGT_COUNT_TPL.build( context, @@ -1636,7 +1678,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, ("box_id_t", box_id_dtype), ), var_values=( - ("srcntgts_have_extent", srcntgts_have_extent), + ("srcntgts_have_extent", srcntgts_extent_norm is not None), ("sources_are_targets", sources_are_targets), ), more_preamble=generic_preamble) diff --git a/doc/images/linf-l2.png b/doc/images/linf-l2.png new file mode 100644 index 0000000000000000000000000000000000000000..0a24f867793b1aca0ca7808a80f8459be57c5dab GIT binary patch literal 12818 zcma)i2UL^K(s$?rQk0@19qAp6NRti$1SEh!Xi|mHdoN1wy*Gh`5RfX74odF|gd$R; zOQc8%NdMyhzUO}B{oZqK&Pn#!ncvLLGdsK4+1=O|+G-SJ3}gTRfI=Ol3J0LZ`zUi|a) z`=5&W|ELDX_?PZ~75~=xe>V7U;eR9l4f~h)zXAW@tNrJ&e>D^X04U>R3;+OM0M3Uq zzxV2JRa-BxwjMw^rft;Vj&J26d#{Tcjx&0Vj9YY@*$Q%xBVL&OBaRY=`H#-6xx@Sm^s`BpXVfy7H)ONC^G^e#D2 zDhcLD0ssiJt1Bz$`_1nbI)v{?1*D7|b}mgVB3i4og`)*;`;Lv-M!y-=%K1dd^}wz{ z{uUuIy6^C&YfZ&xsO^1Un%Vx*O2Hn6?;94wn<+3KJ+RE0urpOO&j|Lt%Bb~?$r4uGHpNe z2E2ykBI>EX;uY9 z*Tb^my;Nd9tqCgYX6ReF4oa$t0e~DbW-E^S_wWc~gGgT2OA=7KM=`OK+W}nK zAAfoZ@-9|=Nb`ksj#77tFK-GY;djr|7 z9e|HfZRzV`L)uFutj}%M==N=O)o+t3huw|%pk>ZXnQHf7frG$^ZE3Rkn31_#gx2F1 z;oP@x_iHzP$e&6AAm7HtsCcm^mU#6^odYHY%g=ivGHyWJE^N} z6#LC@hFPP~l47b>YgQJEzOToN!DEGcr)ud>=Nd&ve6NB^q=_d<>qZwIQ=PITC+i@{ z$IiA7{v2E@oIVshu$n5jT9R{+3%^oNTsLE1V*1k3gIHME>R-ve*jjURIQu>kqbdz_ zs;37zi?FbK$(~vpxJZ>-mg_1K+FDavL^v?AqDo}y9+>#wIH{`j(#k_={Qq1IFE#N5 z^3?vo(xpJSJfdWnlMo|QTiHalerK0Hl1$!Rf!1P~Z_tOoodn7t#X8vooT`THGUnr7 z0+vHFi0q#ChM_0x8lHDw1>%~V-O#wVAZs>$YFeA2l9wwQPX->rw^VB{>YB^L@5>BESK1 zCLS26YS-@RncDcUCjp~LnI~j67xqACGYcn6$n@L-Km0V3AHm=I#NTz(r{Ojg#7p8B}o7{{*LuZiHu5O<=f3r(whE+99Gn=qVULlk{2^qnM=ABs&Pza z)`6@jKoLjL=yAo@_Ocm;l*d!HK+u@Zs!g|^&=(Dpse$-$ykC!|GUy0ln_NS^g`I8f ze6j=F)rRq6?7ak_u|)ps-E$AH%R0ktu?p*_0qWl_mND(@ z)fS)4Y^;R&#IgfE4ib2##$W}seyr@m0@N25x3x~&Xo>G&Q1iN1@*N(0oL#d%?KivQ z{O`}{7F1O2byHi=A5@rg;-4FTnIvQu#z>Q}2q(8LZ(`(6l~@Kw7zl=n4JG4r!_f0D zq<_0_9F(4N)nf6G-tN8ukZq7_0QYF5yo@hn7<%Nu#&k-qz|h3~_}zt-=mraSw~X)M zTl5HNUBtxf8ri*Apf6-=ix zAM#MHUYHN;X>iwhM9fL~IkmJ*?O%=Qka4mV#8K;h%Y6k?NByAN$^2fdKpJ8Jhw}={ z+@m9kG5)t`hOd)QsB7gc0nD65HgWU(%FlVB2#q|U9p*;)wk6busF~ku;sy%WytgT0 zF)Uz}SZNlNA@$#WjfZ@oE@td6!0e66A>JYLjG1kAyaTiA@l-TEy>Rr@{Emq2CN~_+ z2Y^4^88ON=^guW}?UtR8<`mdOj%wlTK(=dbBy*d&u6~Xo?7A*J^T z#n|d@Kj7;4;Vbnf20K~ml#4j65->uq|b0pI@YLeGO7BY^o0a87GaJAQpwvh z2CP_vF@p@7SNleJ{x@xhBL!qrd|+ra8*J?%HH?{iOY zIJ%zugFS1k8V1I94ntAI7dK|Kw9unIP*4V~WKVH}>_>E`GLos&_?|TLGgPr@NGWhm zuH}+o+qORle6;vsE-PStFmOlDb$GyjF(#zc*G<#QjpFP)1i)&H#m-g_ zw3Cz7gV6$5h0ihU2y`?n&2Cs2TE=Z4SRcA5K~}U;8@Q1vb=SOv66{IWdqE+|g-ze% zl_lc-#1Hgcb~zbj#S*feqDFJjWDLW$GQ&5OKeEn$L_m{ngOkJmn52m!QML9j>+iOwm zvH#H|4tDu_dkPT>D!5*}eC+~MfYR?J53I?tgKyR@UoY8^uwofmI%{C`lXqcu8#Nvf zdpXaPfGpP=^_AmnMT9d9?ZT5KsWbL-;7D$)@cOl)L~RbYw~bEXFNc)#i__5SYvJn+ z9l4kc%TlyC&)j)}cIT}JXMbw1u^Xp-ALY-T3eY5hd^f$AbYZ!mQ|xI=`f><%>SN!z&z;7Jip znGemrK4QmiMK|0Wy3wYbPs30w-gxa=F%2k|Zf%CA+DzoN$4u!EH1B8amN^8J&=M)T z1gu~MO0QC2{{%okyM|gK>6|q9o}GXGt{;xxiIG5;f#b~)v6{}KyQZ}5TDaLZDI#^J zo_&u)&U~(5b z9v)O$HLU`ai0D|vH*-m$v6?mmGF0x#ih7G55CalKK46iH$uWEj=Kt2Mn}rT9AwiPj zg9CF5e1a}nt@0nl2@Srik1TtHy!y;5oRY`NiV|*>aXtBmrCx1EuBHxF3%|fkScG^; zCH3$1+KkHf!g!*{=^YN%s*5|fwf?d~kb1`XKpn<)W7rMbBUCAqg^jwi;5?ZxW{?$_ z_>)3dfaK16u_g$r66mS^@TxAm27rfjF0(xA2tDrv`DoAqz*Hi#_ip^G>?@0lJ*kqvv<#rT zLqd=bp=Z}j1^|%kr$)4}fTKu}#~Jw@xhYcQ8Qkui)xCb3NQ)&z zhOyKCCgn!i@fMg$^aE{53IFeu)37faaHeX}CbaJ`113@Ps%M3yHIa)b3{5Z!8grXC z`JHxpKj0MUrg;UknsDeVhd-ljISSBOE77}O*Ms(&YTCie{0wD6bFFB( z6N-fbG)E`rn^9>iyM5)S;tK+Q<|J|qF%sSsC{twFyN+`^B4n5U{=GUHq%PMHtsOkX zuI^~z%j-|Pz>$gO)bC(l`o^m4^XSvsbD|w(=!$HlF{R?clQw+p#$+fN0xhsizA%_6 z<)V{Wl^yU-v*RAp%s5^fL8<5|7x>|me0H85w47N+!UL}D#!u1~fD8?~G42Wr1~HNE z@r-(<&?kFV3PvU1`T3x)-;5@%Sq`GpWGgu$F{uE)rejS2}eT?&T+~wZ-qp z(0mrBhLJ9j#cZYu0J}%GB{zCufh==A2a*EVxvn?daY zW)!Wa7&SWeT*zS7{M)Pmz!72yH%Ax#G!ek+uIRH{ey=)?A_pKzUX3}QG{2aHgN1K| zj3`0X@4}cLrUf`8`<9>?J5Dyj{tnW5He_~^mO%RX%6q>bWy(l2W&la@kFZX4fUlHr zbDGH`?BX`;oW1_lFPvU7oL-|8;XivpRyqZZ%9z(B)$ZT$kaVFq&D+ar7smO@H(IHnfAT1+4)XsKq&+0B& zk#K^c*L!eRJCdciMwb=EgqC}Qj(~%$Z&>r;fo==p=2M~?9h00^k@!emy>M2bWeGZK z0gmn=$z9vq?VW6FP#;_Pt*P(^t-dCPQ_O>UD+C;HQK!<$UTcpP0L!hL9n%%Q^eCBI zKofy!=rw|OKMjSyCa+tw?X0&;IX`khY4(!Qfk~d*zC{CNmubC_XD6sY|JH)nQSV>F zL!MbM(+LB2OBJET4*K-4n(g!QQq8e89U*~41)bi$qm9ZVatEOq@k zxYA4&CqM?u$l%fz>;1fgvtz%|v+&vL`$GelXWUVE3c#J`n4aj`&VZYq+AcaN@sxm= z7i9?kjI}Fx>t$EG1}Qk%%{Qpc)V$asdjI<8q1y4j zaB;CvcGFsb-l`H$=6n%D9@x9&PqoqV8UU9=@4Q&&L6ZA} z4HvH3EG}2KLZ3TbuNB{{-V1%#G@?)jg0?-s`OJxBHWn+p&tG|G2%Mk5ha91Pi;my~ zR~e0Y0g#+8svYo=q?)jC^c)E|{+>zAE#kjb7sQ_|Vvt~h$qYL#I9O4AjSNnbtSAwK zX}3hZ{Zt|_B2M*29_qe%g!|6~3NEKCn2APFHF)YH>Bjr1VKokCn|x_hu&fs@p(Smv z4g4EFppj|0nl6HuJ6aY+PS@EP4Hy|h5A_m0 z+=5`dOJH2d8K_Uc744DS%~40xKm3jd^$Z_hiW&zLdJ%xLQ6d+YpSunWdrsgk7IZIG zypC7~k`lPbW8}E}Rxa9!kY+OO5NM?(3Akw)k4~E$W}c6S6imqr*G1|TY^Z6!cr~!j z%Ns{o6wf}=on&j=$N60Rl7{Vjl3sFBES*HH)9|P!h*~4xHTu%_jm5QU_WTRFK%U+<=t0rvLGM>Yo^NxwXFi;n zV69A(QltS(s}!Fv8!Bm5gfzKi-6;0wJMnp~*RMi4LU@E&98PaJ?+|Ysf!r#yZMRVo zK5KfJ)4|i&TBO;tJs(X?F)=8$j<@>v=bDKXn*1swxx{dvVF_xzX?f_1$O{U6K{OS` zwBl7$YV$C~FMD|db+h-D_fw@B+&7uNhQ_3?cIZ#ruU%R(wxn)IXerZ-3EZ@EhukUR z%G36Wd3*9h*Sunl%R$vDO^!R%<=PR_hz{w3*T7vm;?DUg-}ichs=vZp=6Lh&hAJdS z`K#(qdz9lLt003n@&*LRNTn`@pAc3mikWT$%;&!b+#iMsRU){(CFS$FL=pE9*S2T& zaZHCUBc~a1RnN-!WbSa~ZqzJN#mh{)2&S`oD|hM%GG=`ldKgw=0Oda4HJk_)oW%qh zIL-(O3X)anycNB#7Yc{HL=_oO>XJ%cKp!t5%99pAOjN2z)RU3= zRJJzS0xsV2>g^!^`|0!UBZpkDa*OHY>k_9@DfmVvb%WLb&@~uS+lU^nw0@_v_^d|* z(|p!pKv4~;U}mU^tU`zXjStZbRd;Pnui8gF%|c35@z6C{q`Okv*}LwX#{-oi_fqFE znJ@JZOGjaLn!(jk<@Bx7VHaSkS^PHi-EOId zN-?<5MGUX`4Awde^7*4_VfgV9QpKRh`+lwpW@YWS`WW4eD6Ph#V9OElO=Ejer;nT} z%;)EK3QTlVXIbka6}U)v<|<~mc2L_v-HzK&J%G{&IlZ6}azA?>US+9|dHFZwPH5*~ zSu``EXmzGh&{b8X@Dpxt#~1{&IRnFxQ*`}<-PC357e4$-QQ2`HP!;4=~88H0W z-B*@MxZ|%KvO6J!qPcQMjtDDO@s%6o8KQJW?JPKKHy=y-m+1pd;d}Z@RrGn$#8AtZ z&|F+peBtYOdlTbsN}2r;m-npc6P|L-) zjb&rhA*v$!ZdaUW7E3)aZ%s2=5?;u8t+7pTGaen`H+2 z<9#IaA+F#3)n`v*CcyhtM`<Jl>`-6B6dxNKEs^}tOa+?-B8jC9O_kKZR-BY739@) zne045TddI#34~PZL(SBR@Nc-rm1pS%>@F22hF6z&^r68omy6I{-BAZsRcNHtkyk>< z&jv^dL|hlmi7mc5v$&jxS*A$N4wM#VDg-Px8D&JJBZy z9VXj)gj;{4l`XP^mMHYx@K2Y$LzSy8M(Lg1#lE{L+{Xz%6+;UVqIEBYlX#?q1HUo& z5`h!c3+b)nr88TG}&3K>&dEcppoVByt9biDz`4qS6 zx3uca8wcLPZe|s|MLWvpZOMP(tISVls1^;Gu?0haL5_-A(@hL8ts{4~joNGUt9wC6 zlt{-PIn-PccW)l}m+7d!o2RA@cQ~7MVLI?@xQ&){xzN@*j^l%S@0m?l(#Gv$3R+J+ zu%j;ntXw;$(x(LC{&c1H{E-`E;3Gid8;YF0n*$b>!l z><|gvsAkGzm=Jvq?OICaX3*gYsrwc{a;JQ`IZ||6&&2Pywf0$-vniCL;3cp3(<9#x zFqg%TJUrOXCHYW&n1w*-Fr2xS9-dy~Y62CHMWRH@*lf5}sEdAlt1Zo)%N_=%K%F+|DJzqqIJbUQVLt2mtw_M~t7yUEwb zihQQsaaP2Mhw1#uq6m)ui{Hgcr2^BRA;hpZB z%dd^Cx*zE36U$(Cw)nhTeyLAf{q{boFwk_)m}+V&3dnIl3g)Lsu9NVc389u>eQoSB zM2>K01Vi5o!FdTZ;7>+ymUTmgTVL&mJifSlPwoueQ_W2a=8zG8XmGcpHAkNpWvN}I z$FJ4&$h$kR#BWjMtBdp4FVpx2KBTkENtDB~lR+?TBshPwQ3@4jR?x%&JWVOmQrEhg zKrWl2QeUwF`>ijMraJp7OG~w(YXvV6FDKTWKv0A&V;j2STSzPF$nod_@_eBMt&ZZ@ zOo24I4)1jvaH0Hh%~n-(Q>ZgdF_}{(7x-I1g{#iVLw2@m6<`7Bra1C z)>|NB3p{>V=ilDmxS{HNJ>R4}-60$YV|J6euC0dEFf=@>0FQJJmBXHi*s?z~y_j$c z4_Z8Z`qOmJc)C#7-p?>8%hhONoCykJh9V9^zdLH~emS9_NN1>jlxSlbfbo_adoQuH z84(A0!U?9rBCM-DpL15C^L%?nkR1sMa}m`&-vb@VVXC-%aB%xp^5Ze^q0VitcRyJ) zQXxl5dQs}Zzu|>Cv_8Kwfw_r_v>@51d!jQhG>7Fr>r)EYLvPt&?JhpNZCl6>`nv6? z>vmHj96Hx#jT^@!b%DI*2dQ18kU6^`Xk; z4fqX9Dt6bgC-92t4AqTd0CM(~FF5(BnFKoK_Vd=}R&@*LKUVyjwBWmm0Hb!0+I*l6 zJ}{iwWM6ldm%W+Mi9P7n1^$M)u83Hwi>dVUg9(Rs{)lvzPu=->)0Ve+Sz9$ znW2if-JM=hkXBAsYvJd9gAXwEQgqp`!y2V4-7E9qCy+-syo0nHDgtXq-t%aeAi4ceB4!pmK$+HLzdL2-D z>uzJF*#P_G-W60G2;)u(wGzTpNe#s>ak(Z1LQ|)Qa0zcJT6Cl%AwHGU`}?+46#VS= zNg8%;TJP*gXO!z*g)r#*88^B@Mb%3l+RcvCdvAp*e}t62+J^m{PMg{VqGMz-rEhwj z@Z-ug7jecaSGStn!X?7$yOwmm86&9yKNuz)7^cP@5LmsIe^lCEFa3kd2zMgr3w@fF4gMG8&Jzo~-Afu)EW?gGCbj^mTjS8?hkgX&;m^gnMIS$o>qZ$%T^NcF zGY{X@l+BIi?YDf=@+wm9hENkf?$E*OnlvSV;)yWU%cb64$To5GlIDt#_NoV|KazL; z=PkO5{Snr*Qzo#rBr7lUtpcNwP6sh=dSCy-8wZljClJduG2#)v>6OQiVA6V0LRiF7ih1Jy!2R#Oq+R{?Y=9Sa7o0kbs+TO^h>+Qk}4bJ6E^h%hD_y zWJa&029#-Q8pdA{OQ*ED6EUN;UNp zHwukBBm%oGIiDq@-)!3cQe zbUr(7*SKJ6J*n4($OyjlW(qvr&?eCxjwaMb2FK=BK7cQUPl-)qLQQZAwUM&1O?(aN z9pnhx%8k>HnYb;3_NrrifA!#8*itkOM;0h6pOS|=EtzsIcd7$*(BOc!!r%EQU^Sw) zvRLziD2GFXxd?QdI|Ix-(gCUW%{Pfklph;X@b~2j$n$*GAA>r`HZ?oA5egUs5p#^0 zByM~CZ45fKfcX@LzVLu#K^WPL6`-~AY|zACbq>m~tj3qF8c<_NRxQ#dMpV0#+cU${ z_4w(XJCQSU)6N*>KNb%yc{xI8z#~vZ;$H|gvy;~Cl-7mMvg0CCFQv$Ilz;TqXOst+ zLLGvsth)tOssq~Egor_AEVtKej|v-fY%sOrYiN%=%z=U4MVwTD6A*xP(_-kHQH9tNm>&;EeGwwjn}+T&OYwmMK&;1YXNEJ&>Q9Q>q_U$qIJpa zH*SuxC!Oiv)^$I6DkiL*uhCg^--sp$Kz7zwXq}^yGmB$$!6QX#JJBXOji<6&OQXcE z25MPINA^407h~eSFYm6hsbObJa>1I#5BDQY^csDIz0bO~=AR1ii&VH$CXEP#BG&kR z4FC0004%F(sqCJM)Y{PH^jV)2oCYVAkRNW(}Rs4CUs{K$QzS*t0nm z>(s|o=uol^*Blu?c78^iyd*T=%!YSQHUB(rVZz5iE;YI#4~?7sxu%{95g~z^KsgOH0bUQU?0F#l?En+tT6pZ{MYb)uB_ z36*=|o6EymmwpFrlnkk-;;rSohtLF9%k)!;R*pI@Q}DoDpX!A7CW=L9f5*CZ^@D=0 zGt?gS`&(ap!L8#Iw?B!TtZoD}c_t2Ou{WCDrtTY|udyOQygp!j;|&ZWr1z=%jLSAR z=~ABbf+^x#r+*yF2nC9>HC*4z>9%7bB1Ake61jB?ES_31x{t4|NsK$Y-$-D8D)=xZ zk2De-J~fGl%zeQPd@9I-BiP~yxPABBA6WsL_%sD}mfHp733}q6`MsrNAr#sleki77 zd|CwS2;t))jAI%eLhw4c7H~P9-(6j|yhs_1x^*Se%XHzPis>JZ$*DPM>VN-!t6m|< zg@1ooBy|Pd{NpiA{kdr-fNDDojeAZd&~_zugK+-ZY9svQ&?heuWKeoIuImQD9X&xZ zEMj%tljWXP-^ZP@+YUKJ#WXa;e8op(5`e|oEHjrv2X4xE21yXPzwxiDWO2rmg1Nf{ z-*>`;{Mpx8g0H22;ttzw-XnynuQ#MjLS1pkAL2L^uLP;leZ-%po2ickIl4ItP9BT{ zTHb7mW(5XvFXY&8_cILcPV2-->3jv-^g>{Lx;8u;+rr$V#z*sH>h)vjq9xU4S+!rbQEJb0 zEw6?&q$H_rC76n6ghCR}1if9n`zIeSf|}Q!J4PBeUq174-|En*9$$|=M759pd6`XB zj`#i{X%oUXB|S*CWN&X$uTASnxrJA_yGf>!e8oJx0uocOsx7ABt-Iu)G)5c|s8V{_ zy}__KINZfG^aL{&29Ch6@!Yljf=L2y=#T~-8d<5VtM48Lz<8wmTpk4Se_Siar-@kb zi0zt{irxhD{W{#LyB+r+`q*u1Vv|gQZuctUp(SJKUU%RH!{mAWs@(@n$t9V@UZ$>0 zS*vPYmFCaG3wrOD=S_o}{QXUoeIhULcpEP{M~C~ac0KJWe-Llf+Ku(MK36hs=+RQk z=U(t?ROv3vGk7p<@_w%=UZCr&BHcIUP$F9*H{_sbIQm>6>1dX=4A6YfuC@IQ&%jgu zyx2?Z1RGi9Y)L!sQBZ+#G*9K1lEAQ0_w-AN#PCZ3*3k?Kd5>ljurQS`|NQ033I0^S zdG)(oOtL?g@@RjAPZ@h^DK|(@Ig-s~d}iqqnak(OI^6u+#>L=ZED zx6t%V3}XNZ&jsB7RNr@f9%ek~;mjqmuxl?&;VV7Au%fNl!Qs#GMwP9`j-XXHX*>Cy z*~zWe;<n5Dq%ChIv=-*Qq zDtljQ@~-uvsI!_RtoGAUwAI4HsezoJ-By_Apn$i5#e*7`^Q3rtDs@?ZK>Ba1k3up3 zC9Z(H7bU$P(A#gW{L;1cq3uFaLwczD`TFaIw`>ke&e+a-AT!jhW;{A+V%up6*U5sO zW4uxJF(c{ok?uw1wugzF;M1PPq$d{5Of+rG!0Gl+&ohpM^WMh+8Z;+gQaALlBL(kn zj5`>O4ab;gT#$$)M4%=h^m|7r&wDqOTKqI~7DSa}yl;4YDEbi0cY7xN8s8~4m4hMzIP&lO8$gQf)D zQCx&S7tnPP)reqi{ zv0YJP-n-Qn-8H08)~C(sV$IxafvNM<%+{w+h@u?|4y5 zJ3%V=8U#vyAu$s3MkY^>6&zlrjD19l{2&2*8mrHx4=h{clqWE5j@>_c93d(bMmI8E z+kV@XsO++JCh8k~SRuQX-Yb`J6=A_~stn?+m9wN4jvyhQ(W_oOF6;MGs>KX-q%zhF zD%RSBb5hTScB=9l$C*e0rON)NQt?2GEMwf=*{NuoFu~M!)X@z47W|aSPsF(iorol6 z{IYA9lm-Z13SE-hKld z>U-H-z(Y`iS8ar+!O3F}=Lutz{&d^4?YT4bXu0)apGt!TqvA`oPpks>h_rlrtsb(p zY$|E(09w8-18zsb$UgiU!Va>dIlL;Iazr0t03Vh1YU5wHCLeKC2S8w-?3}*f{k?qq zAABeUnk@?M=`SBSeB-PRJSBL|UZ|I;{t55CwDCW5NIO?Cr? zl$PGcv+E}p$4F>Ey9RG(7%cmRTe9xLg)13Xwd!n?)nAu^yjT z`ia*ktMdA#cIT&{Fh;4*O${>RoL{|et*k~h04eh)9!;a0eXsJ%F5depa{sx; z8rmcOd*6pWxnZGvwH3uROR|Ft3QjK-BQG0EFIy>V4_jOT5EBuV5EPLV6n&~MDk=pO zm%=S5MSxNwBHrBSPyd&Iv#X7Ro&WzVKp5Z@gcG3s+k%^oozzQP4^IbI7r+Z;RY6f9 WAS;n6xg?GcP*>4bu2FpT_J09ZJa5YY literal 0 HcmV?d00001 diff --git a/doc/linf-l2.tikz b/doc/linf-l2.tikz new file mode 100644 index 0000000..6bc330e --- /dev/null +++ b/doc/linf-l2.tikz @@ -0,0 +1,41 @@ +\def\sout{0.25} +\def\pr{0.55} + +\begin{tikzpicture}[scale=1.5,baseline={(0,0)}] + \def\sout{0.25} + + \draw (-1,-1) rectangle (1,1); + \draw [dashed] (-1-\sout,-1-\sout) rectangle (1+\sout,1+\sout); + + \node [anchor=south] at (0,-1) {Box}; + \node [anchor=north] at (0,-1-\sout) {`Stick-out' region }; + \node [anchor=north] at (0,-1.85) {\texttt{'linf'}}; + \draw [|<->|] (0,0) -- (1+\sout,0) node [pos=0.5, anchor=north] {$(1+\alpha)R$}; + + \coordinate (pc) at (-0.25, 0.9); + \fill [red] (pc) circle (1pt); + \draw [red] (pc) ++(-\pr,-\pr) rectangle ++(2*\pr,2*\pr); + \draw [red,|<->|] (pc) -- ++(0,-\pr) node [pos=0.5,anchor=east] {$r$}; + + \node [anchor=north] at ($ (pc) + (0,-\pr)$) {Particle not in box}; + +\end{tikzpicture} +\begin{tikzpicture}[scale=1.5,baseline={(0,0)}] + \def\sqrttwo{1.4145} + + \draw (-1,-1) rectangle (1,1); + \draw [dashed] (0,0) circle ((\sqrttwo+\sout*\sqrttwo); + \draw [|<->|] (0,0) -- (-1-\sout,-1-\sout) node [pos=0.3, anchor=west] {$\sqrt2(1+\alpha)R$}; + + \node [anchor=south] at (0,-1) {Box}; + \node [anchor=north] at (0,-1-\sout) {`Stick-out' region }; + \node [anchor=north] at (0,-1.85) {\texttt{'l2'}}; + + \coordinate (pc) at (-0.25, 0.9); + \fill [green] (pc) circle (1pt); + \draw [green] (pc) circle (\pr); + \draw [green,|<->|] (pc) -- ++(0,-\pr) node [pos=0.5,anchor=east] {$r$}; + + \node [anchor=north] at ($ (pc) + (0,-\pr)$) {Particle in box}; + +\end{tikzpicture} diff --git a/doc/make-images.sh b/doc/make-images.sh new file mode 100755 index 0000000..2824805 --- /dev/null +++ b/doc/make-images.sh @@ -0,0 +1,3 @@ +#! /bin/bash + +tikz2png linf-l2.tikz images/linf-l2.png -density 200 diff --git a/doc/tikz2png b/doc/tikz2png new file mode 100755 index 0000000..022897e --- /dev/null +++ b/doc/tikz2png @@ -0,0 +1,57 @@ +#! /bin/bash + +set -e +set -x + +if test "$1" = "" || test "$2" = ""; then + echo "Usage: $0 pic.tikz pic.png [CONVERT FLAGS ...]" + exit 1 +fi + +TIKZ="$1" +OUTF="$2" +shift +shift + +TMPDIR=$(mktemp -d) +TEX="$TMPDIR/source.tex" + +cat < $TEX +\documentclass[preview]{standalone} +\nonstopmode +\usepackage{tikz} +\usetikzlibrary{calc} +\usetikzlibrary{positioning} +\usetikzlibrary{fadings} +\usetikzlibrary{chains} +\usetikzlibrary{scopes} +\usetikzlibrary{shadows} +\usetikzlibrary{arrows} +\usetikzlibrary{snakes} +\usetikzlibrary{shapes.misc} +\usetikzlibrary{shapes.symbols} +\usetikzlibrary{shapes.multipart} +\usetikzlibrary{fit} +\usetikzlibrary{shapes.arrows} +\usetikzlibrary{shapes.geometric} +\usetikzlibrary{shapes.callouts} +\usetikzlibrary{decorations.text} + +\pgfdeclarelayer{background} +\pgfdeclarelayer{foreground} +\pgfsetlayers{background,main,foreground} + +\renewcommand*\familydefault{\sfdefault} + +\begin{document} +END + +cat $TIKZ >> $TEX + +cat <> $TEX +\end{document} +END + +(cd "$TMPDIR"; pdflatex source) +(cd "$TMPDIR"; convert "$@" source.pdf source.png) +cp "$TMPDIR/source.png" "$OUTF" -- GitLab From 558583e350ad1d18b41c08b6ad45b39627c6bdc2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Sep 2017 17:32:34 -0500 Subject: [PATCH 02/18] Test, fix l2 stickout check --- boxtree/tree_build_kernels.py | 16 ++++++----- test/test_tree.py | 51 ++++++++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index 5970b06..b9e8528 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -355,14 +355,14 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// * (${ax}_bits + one_half) * next_level_box_size_factor; + %endfor + + %if srcntgts_extent_norm == "linf": const coord_t next_level_box_stick_out_radius_${ax} = box_radius_factor * global_extent_${ax} * next_level_box_size_factor; - %endfor - - %if srcntgts_extent_norm == "linf": %for ax in axis_names: // stop descent here if particle sticks out of next-level box stop_srcntgt_descent = stop_srcntgt_descent || @@ -382,17 +382,19 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// * global_extent_x /* assume isotropy */ * next_level_box_size_factor; - coord_t next_level_box_center_to_srcntgt_bdry_l2_dist_squared = 0 + coord_t next_level_box_center_to_srcntgt_bdry_l2_dist = + sqrt( %for ax in axis_names: + (srcntgt_${ax} - next_level_box_center_${ax}) * (srcntgt_${ax} - next_level_box_center_${ax}) %endfor - + srcntgt_radius*srcntgt_radius - ; + ) + srcntgt_radius; // stop descent here if particle sticks out of next-level box stop_srcntgt_descent = stop_srcntgt_descent || - (next_level_box_center_to_srcntgt_bdry_l2_dist_squared + ( + next_level_box_center_to_srcntgt_bdry_l2_dist + * next_level_box_center_to_srcntgt_bdry_l2_dist >= ${dimensions} * next_level_box_stick_out_radius * next_level_box_stick_out_radius); diff --git a/test/test_tree.py b/test/test_tree.py index dff309f..f68a885 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -444,7 +444,8 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False): @pytest.mark.opencl @pytest.mark.parametrize("dims", [2, 3]) -def test_extent_tree(ctx_getter, dims, do_plot=False): +@pytest.mark.parametrize("extent_norm", ["linf", "l2"]) +def test_extent_tree(ctx_getter, dims, extent_norm, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() @@ -477,6 +478,7 @@ def test_extent_tree(ctx_getter, dims, do_plot=False): dev_tree, _ = tb(queue, sources, targets=targets, source_radii=source_radii, target_radii=target_radii, + extent_norm=extent_norm, refine_weights=refine_weights, max_leaf_refine_weight=20, @@ -555,9 +557,6 @@ def test_extent_tree(ctx_getter, dims, do_plot=False): for ibox in range(tree.nboxes): extent_low, extent_high = tree.get_box_extent(ibox) - box_radius = np.max(extent_high-extent_low) * 0.5 - stick_out_dist = tree.stick_out_factor * box_radius - assert (extent_low >= tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox assert (extent_high <= @@ -573,6 +572,19 @@ def test_extent_tree(ctx_getter, dims, do_plot=False): + np.sum(tree.box_target_counts_cumul[existing_children]) == tree.box_target_counts_cumul[ibox]) + del existing_children + del box_children + + for ibox in range(tree.nboxes): + lev = int(tree.box_levels[ibox]) + box_radius = 0.5 * tree.root_extent / (1 << lev) + box_center = tree.box_centers[:, ibox] + extent_low = box_center - box_radius + extent_high = box_center + box_radius + + stick_out_dist = tree.stick_out_factor * box_radius + radius_with_stickout = (1 + tree.stick_out_factor) * box_radius + for what, starts, counts, points, radii in [ ("source", tree.box_source_starts, tree.box_source_counts_cumul, sorted_sources, sorted_source_radii), @@ -584,18 +596,33 @@ def test_extent_tree(ctx_getter, dims, do_plot=False): check_particles = points[:, bslice] check_radii = radii[bslice] - good = ( - (check_particles + check_radii - < extent_high[:, np.newaxis] + stick_out_dist) - & - (extent_low[:, np.newaxis] - stick_out_dist - <= check_particles - check_radii) - ).all(axis=0) + if extent_norm == "linf": + good = ( + (check_particles + check_radii + < extent_high[:, np.newaxis] + stick_out_dist) + & + (extent_low[:, np.newaxis] - stick_out_dist + <= check_particles - check_radii) + ).all(axis=0) + + elif extent_norm == "l2": + center_dists = np.sqrt( + np.sum( + (check_particles - box_center.reshape(-1, 1))**2, + axis=0)) + + good = ( + (center_dists + check_radii)**2 + < dims * radius_with_stickout**2) + + else: + raise ValueError("unexpected value of extent_norm") all_good_here = good.all() if not all_good_here: - print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox])) + print("BAD BOX %s %d level %d" + % (what, ibox, tree.box_levels[ibox])) all_good_so_far = all_good_so_far and all_good_here assert all_good_here -- GitLab From 75c8cafeabf28e921448d296738acc91f49c8724 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 18 Sep 2017 17:47:17 -0500 Subject: [PATCH 03/18] Fix use of next_level_box_stick_out_radius_{ax} in l2 stick-out check --- boxtree/tree_build_kernels.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index b9e8528..5c9bfcb 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -358,12 +358,12 @@ MORTON_NR_SCAN_PREAMBLE_TPL = Template(r"""//CL// %endfor %if srcntgts_extent_norm == "linf": - const coord_t next_level_box_stick_out_radius_${ax} = - box_radius_factor - * global_extent_${ax} - * next_level_box_size_factor; - %for ax in axis_names: + const coord_t next_level_box_stick_out_radius_${ax} = + box_radius_factor + * global_extent_${ax} + * next_level_box_size_factor; + // stop descent here if particle sticks out of next-level box stop_srcntgt_descent = stop_srcntgt_descent || (srcntgt_${ax} + srcntgt_radius >= -- GitLab From f82ac38fd73101346879953d36d3ddc6293acc87 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Sep 2017 13:50:42 -0500 Subject: [PATCH 04/18] Add, test true box extent finder --- boxtree/traversal.py | 230 ++++++++++++++++++++++++++++++++++++++++- test/test_traversal.py | 24 +++++ 2 files changed, 253 insertions(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 0bb5a31..1461209 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -298,6 +298,98 @@ LEVEL_START_BOX_NR_EXTRACTOR_TEMPLATE = ElementwiseTemplate( # }}} +# {{{ box extents + +BOX_EXTENTS_FINDER_TEMPLATE = ElementwiseTemplate( + arguments="""//CL:mako// + box_id_t aligned_nboxes, + box_id_t *box_child_ids, + coord_t *box_centers, + particle_id_t *box_particle_starts, + particle_id_t *box_particle_counts_nonchild + + %for iaxis in range(dimensions): + , const coord_t *particle_${AXIS_NAMES[iaxis]} + %endfor + , + const coord_t *particle_radii, + int enable_radii, + + coord_t *box_particle_bounding_box_min, + coord_t *box_particle_bounding_box_max, + """, + + operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// + box_id_t ibox = i; + + ${load_center("box_center", "ibox")} + + <% axis_names = AXIS_NAMES[:dimensions] %> + + // incorporate own particles + %for iaxis, ax in enumerate(axis_names): + coord_t min_particle_${ax} = box_center.s${iaxis}; + coord_t max_particle_${ax} = box_center.s${iaxis}; + %endfor + + particle_id_t start = box_particle_starts[ibox]; + particle_id_t stop = start + box_particle_counts_nonchild[ibox]; + + for (particle_id_t iparticle = start; iparticle < stop; ++iparticle) + { + coord_t particle_rad = 0; + %if sources_have_extent or targets_have_extent: + // If only one has extent, then the radius array for the other + // may well be a null pointer. + if (enable_radii) + particle_rad = particle_radii[iparticle]; + %endif + + %for iaxis, ax in enumerate(axis_names): + coord_t particle_coord_${ax} = particle_${ax}[iparticle]; + + min_particle_${ax} = min( + min_particle_${ax}, + particle_coord_${ax} - particle_rad); + max_particle_${ax} = max( + max_particle_${ax}, + particle_coord_${ax} + particle_rad); + %endfor + } + + // incorporate child boxes + for (int morton_nr = 0; morton_nr < ${2**dimensions}; ++morton_nr) + { + box_id_t child_id = box_child_ids[ + morton_nr * aligned_nboxes + ibox]; + + if (child_id == 0) + continue; + + %for iaxis, ax in enumerate(axis_names): + min_particle_${ax} = min( + min_particle_${ax}, + box_particle_bounding_box_min[ + ${iaxis} * aligned_nboxes + child_id]); + max_particle_${ax} = max( + max_particle_${ax}, + box_particle_bounding_box_max[ + ${iaxis} * aligned_nboxes + child_id]); + %endfor + } + + // write result + %for iaxis, ax in enumerate(axis_names): + box_particle_bounding_box_min[ + ${iaxis} * aligned_nboxes + ibox] = min_particle_${ax}; + box_particle_bounding_box_max[ + ${iaxis} * aligned_nboxes + ibox] = max_particle_${ax}; + %endfor + """, + name="find_box_extents") + +# }}} + # {{{ same-level non-well-separated boxes (generalization of "colleagues") SAME_LEVEL_NON_WELL_SEP_BOXES_TEMPLATE = r"""//CL// @@ -485,6 +577,9 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) box_id_t sib_box_id = box_child_ids[ morton_nr * aligned_nboxes + parent_nf]; + if (sib_box_id == 0) + continue; + ${load_center("sib_center", "sib_box_id")} bool sep = !is_adjacent_or_overlapping_with_neighborhood( @@ -1013,6 +1108,32 @@ class FMMTraversalInfo(DeviceDataRecord): Indices into :attr:`target_or_target_parent_boxes` indicating where each level starts and ends. + .. ------------------------------------------------------------------------ + .. rubric:: Box extents + .. ------------------------------------------------------------------------ + + The attributes in this section are only available if the respective + particle type (source/target) has extents and if :attr:`Tree.extent_norm` + is ``"linf"``. + + If they are not available, the corresponding attributes will be *None*. + + .. attribute:: box_source_bounding_box_min + + ``coordt_t [dimensions, aligned_nboxes]`` + + .. attribute:: box_source_bounding_box_max + + ``coordt_t [dimensions, aligned_nboxes]`` + + .. attribute:: box_target_bounding_box_min + + ``coordt_t [dimensions, aligned_nboxes]`` + + .. attribute:: box_target_bounding_box_max + + ``coordt_t [dimensions, aligned_nboxes]`` + .. ------------------------------------------------------------------------ .. rubric:: Same-level non-well-separated boxes .. ------------------------------------------------------------------------ @@ -1371,6 +1492,23 @@ class FMMTraversalBuilder: debug=debug, name_prefix="sources_parents_and_targets") + result["box_extents_finder"] = \ + BOX_EXTENTS_FINDER_TEMPLATE.build(self.context, + type_aliases=( + ("box_id_t", box_id_dtype), + ("coord_t", coord_dtype), + ("coord_vec_t", cl.cltypes.vec_types[ + coord_dtype, dimensions]), + ("particle_id_t", particle_id_dtype), + ), + var_values=( + ("dimensions", dimensions), + ("AXIS_NAMES", AXIS_NAMES), + ("sources_have_extent", sources_have_extent), + ("targets_have_extent", targets_have_extent), + ), + ) + result["level_start_box_nrs_extractor"] = \ LEVEL_START_BOX_NR_EXTRACTOR_TEMPLATE.build(self.context, type_aliases=( @@ -1567,7 +1705,92 @@ class FMMTraversalBuilder: # }}} - # {{{ same-level near-field + # {{{ box extents + + fin_debug("finding box extents") + + box_source_bounding_box_min = cl.array.empty( + queue, (tree.dimensions, tree.aligned_nboxes), + dtype=tree.coord_dtype) + box_source_bounding_box_max = cl.array.empty( + queue, (tree.dimensions, tree.aligned_nboxes), + dtype=tree.coord_dtype) + + if tree.sources_are_targets: + box_target_bounding_box_min = cl.array.empty( + queue, (tree.dimensions, tree.aligned_nboxes), + dtype=tree.coord_dtype) + box_target_bounding_box_max = cl.array.empty( + queue, (tree.dimensions, tree.aligned_nboxes), + dtype=tree.coord_dtype) + + else: + box_target_bounding_box_min = box_source_bounding_box_min + box_target_bounding_box_max = box_source_bounding_box_max + + bogus_radii_array = cl.array.empty(queue, 1, dtype=tree.coord_dtype) + + # nlevels-1 is the highest valid level index + for level in range(tree.nlevels-1, -1, -1): + start, stop = tree.level_start_box_nrs[level:level+2] + + for (skip, enable_radii, bbox_min, bbox_max, + pstarts, pcounts, radii_tree_attr, particles) in [ + ( + # never skip + False, + + tree.sources_have_extent, + box_source_bounding_box_min, + box_source_bounding_box_max, + tree.box_source_starts, + tree.box_source_counts_nonchild, + "source_radii", + tree.sources), + ( + # skip the 'target' round if sources and targets + # are the same. + tree.sources_are_targets, + + tree.targets_have_extent, + box_target_bounding_box_min, + box_target_bounding_box_max, + tree.box_target_starts, + tree.box_target_counts_nonchild, + "target_radii", + tree.targets), + ]: + + if skip: + continue + + args = ( + ( + tree.aligned_nboxes, + tree.box_child_ids, + tree.box_centers, + pstarts, pcounts,) + + tuple(particles) + + ( + getattr(tree, radii_tree_attr, bogus_radii_array), + enable_radii, + + bbox_min, + bbox_max)) + + evt = knl_info.box_extents_finder( + *args, + + range=slice(start, stop), + queue=queue, wait_for=wait_for) + + wait_for = [evt] + + del bogus_radii_array + + # }}} + + # {{{ same-level non-well-separated boxes # If well_sep_is_n_away is 1, this agrees with the definition of # 'colleagues' from the classical FMM literature. @@ -1718,6 +1941,11 @@ class FMMTraversalBuilder: level_start_target_or_target_parent_box_nrs=( level_start_target_or_target_parent_box_nrs), + box_source_bounding_box_min=box_source_bounding_box_min, + box_source_bounding_box_max=box_source_bounding_box_max, + box_target_bounding_box_min=box_target_bounding_box_min, + box_target_bounding_box_max=box_target_bounding_box_max, + same_level_non_well_sep_boxes_starts=( same_level_non_well_sep_boxes.starts), same_level_non_well_sep_boxes_lists=( diff --git a/test/test_traversal.py b/test/test_traversal.py index 5f15131..6c4096e 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -249,6 +249,30 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets): # }}} + # {{{ box extents make sense + + for ibox in range(tree.nboxes): + ext_low, ext_high = tree.get_box_extent(ibox) + center = tree.box_centers[:, ibox] + + for which, bbox_min, bbox_max in [ + ( + "source", + trav.box_source_bounding_box_min[:, ibox], + trav.box_source_bounding_box_max[:, ibox]), + ( + "target", + trav.box_target_bounding_box_min[:, ibox], + trav.box_target_bounding_box_max[:, ibox]), + ]: + assert (ext_low <= bbox_min).all() + assert (bbox_min <= center).all() + + assert (bbox_max <= ext_high).all() + assert (center <= bbox_max).all() + + # }}} + # }}} -- GitLab From 924c111ddc671edd3b3299d80b9d377bdf758a43 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Sep 2017 14:59:38 -0500 Subject: [PATCH 05/18] Fix sources-are-targets handling of box extent finder --- boxtree/traversal.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 1461209..13423f5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1717,6 +1717,9 @@ class FMMTraversalBuilder: dtype=tree.coord_dtype) if tree.sources_are_targets: + box_target_bounding_box_min = box_source_bounding_box_min + box_target_bounding_box_max = box_source_bounding_box_max + else: box_target_bounding_box_min = cl.array.empty( queue, (tree.dimensions, tree.aligned_nboxes), dtype=tree.coord_dtype) @@ -1724,10 +1727,6 @@ class FMMTraversalBuilder: queue, (tree.dimensions, tree.aligned_nboxes), dtype=tree.coord_dtype) - else: - box_target_bounding_box_min = box_source_bounding_box_min - box_target_bounding_box_max = box_source_bounding_box_max - bogus_radii_array = cl.array.empty(queue, 1, dtype=tree.coord_dtype) # nlevels-1 is the highest valid level index -- GitLab From 2cd910d8b62755e069d14515cc76fd1c19884b4c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 26 Sep 2017 19:56:33 -0500 Subject: [PATCH 06/18] List 4 build: some variable name improvements --- boxtree/traversal.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 13423f5..0975c15 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -851,9 +851,9 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (tgt_box_level == 0) return; - box_id_t parent_box_id = box_parent_ids[tgt_ibox]; - const int parent_level = tgt_box_level - 1; - ${load_center("parent_center", "parent_box_id")} + box_id_t tgt_parent_box_id = box_parent_ids[tgt_ibox]; + const int tgt_parent_level = tgt_box_level - 1; + ${load_center("parent_center", "tgt_parent_box_id")} box_flags_t tgt_box_flags = box_flags[tgt_ibox]; @@ -863,13 +863,13 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) // may directly jump to the parent level. int walk_level = tgt_box_level - 1; - box_id_t current_parent_box_id = parent_box_id; + box_id_t current_tgt_parent_box_id = tgt_parent_box_id; %else: // In a 2+-away FMM, tgt_ibox's same-level non-well-separated boxes *may* // be sufficiently separated from tgt_ibox to be in its list 4. int walk_level = tgt_box_level; - box_id_t current_parent_box_id = tgt_ibox; + box_id_t current_tgt_parent_box_id = tgt_ibox; %endif /* @@ -883,14 +883,14 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) for (; walk_level != 0; // {{{ advance --walk_level, - current_parent_box_id = box_parent_ids[current_parent_box_id] + current_tgt_parent_box_id = box_parent_ids[current_tgt_parent_box_id] // }}} ) { box_id_t slnws_start = - same_level_non_well_sep_boxes_starts[current_parent_box_id]; + same_level_non_well_sep_boxes_starts[current_tgt_parent_box_id]; box_id_t slnws_stop = - same_level_non_well_sep_boxes_starts[current_parent_box_id+1]; + same_level_non_well_sep_boxes_starts[current_tgt_parent_box_id+1]; // /!\ i is not a box id, it's an index into // same_level_non_well_sep_boxes_lists. @@ -939,7 +939,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) { bool in_parent_list_1 = is_adjacent_or_overlapping(root_extent, - parent_center, parent_level, + parent_center, tgt_parent_level, slnws_center, walk_level); bool would_be_in_parent_list_4_not_considering_stickout = ( @@ -987,7 +987,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) %if sources_have_extent or targets_have_extent: const bool parent_meets_with_ext_sep_criterion = meets_sep_bigger_criterion(root_extent, - parent_center, parent_level, + parent_center, tgt_parent_level, slnws_center, walk_level, stick_out_factor); -- GitLab From cf8f90d62ecd181484f42ae4abe88628a0e8498e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 1 Oct 2017 22:47:49 -0500 Subject: [PATCH 07/18] Allow adjusting list 3 far criterion, implement static_linf and precise_linf criteria --- boxtree/traversal.py | 236 +++++++++++++++++++++++++++++++------------ test/test_fmm.py | 21 +++- 2 files changed, 190 insertions(+), 67 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index e5c22aa..bbf8da0 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -105,30 +105,61 @@ TRAVERSAL_PREAMBLE_MAKO_DEFS = r"""//CL:mako// <%def name="load_center(name, box_id, declare=True)"> %if declare: - coord_vec_t ${name}; + coord_vec_t ${name} = (coord_vec_t)( + %else: + ${name} = (coord_vec_t)( %endif - %for i in range(dimensions): - ${name}.${AXIS_NAMES[i]} = box_centers[aligned_nboxes * ${i} + ${box_id}]; - %endfor + %for i in range(dimensions): + box_centers[aligned_nboxes * ${i} + ${box_id}] + %if i + 1 < dimensions: + , + %endif + %endfor + ); -<%def name="check_l_infty_ball_overlap( - is_overlapping, box_id, ball_radius, ball_center)"> - { - ${load_center("box_center", box_id)} - int box_level = box_levels[${box_id}]; - - coord_t size_sum = LEVEL_TO_RAD(box_level) + ${ball_radius}; +<%def name="load_true_box_extent(name, box_id, kind, declare=True)"> + %if declare: + coord_vec_t ${name}_ext_center, ${name}_radii_vec; + %endif - coord_t max_dist = 0; - %for i in range(dimensions): - max_dist = fmax(max_dist, - fabs(${ball_center}.s${i} - box_center.s${i})); + { + %for bound in ["min", "max"]: + coord_vec_t ${name}_${bound} = (coord_vec_t)( + %for iaxis in range(dimensions): + box_${kind}_bounding_box_${bound}[ + ${iaxis} * aligned_nboxes + ${box_id}] + %if iaxis + 1 < dimensions: + , + %endif + %endfor + ); %endfor - ${is_overlapping} = max_dist <= size_sum; + ${name}_ext_center = 0.5*(${name}_min + ${name}_max); + ${name}_radii_vec = 0.5*(${name}_max - ${name}_min); } + +<%def name="load_true_box_linf_radius(name, center, box_id, kind, declare=True)"> + %if declare: + coord_t ${name}; + %endif + + ${name} = 0; + + %for iaxis in range(dimensions): + %for bound in ["min", "max"]: + ${name} = fmax( + ${name}, + fabs( + ${center}.${AXIS_NAMES[iaxis]} + - box_${kind}_bounding_box_${bound}[ + ${iaxis} * aligned_nboxes + ${box_id}])); + %endfor + %endfor + + """ @@ -603,49 +634,37 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) FROM_SEP_SMALLER_TEMPLATE = r"""//CL// -inline bool meets_sep_smaller_criterion( - coord_t root_extent, - coord_vec_t target_center, int target_level, - coord_vec_t source_center, int source_level, - coord_t stick_out_factor) -{ - coord_t target_rad = LEVEL_TO_RAD(target_level); - coord_t source_rad = LEVEL_TO_RAD(source_level); - coord_t min_allowed_center_l_inf_dist = ( - 3 * source_rad - + (1 + stick_out_factor) * target_rad); - - coord_t l_inf_dist = 0; - %for i in range(dimensions): - l_inf_dist = fmax( - l_inf_dist, - fabs(target_center.s${i} - source_center.s${i})); - %endfor - - return l_inf_dist >= min_allowed_center_l_inf_dist * (1 - 8 * COORD_T_MACH_EPS); -} - - void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) { // /!\ target_box_number is *not* a box_id, despite the type. // It's the number of the target box we're currently processing. - box_id_t box_id = target_boxes[target_box_number]; + box_id_t tgt_box_id = target_boxes[target_box_number]; - ${load_center("center", "box_id")} + ${load_center("tgt_center", "tgt_box_id")} - int level = box_levels[box_id]; + int tgt_level = box_levels[tgt_box_id]; + + %if targets_have_extent: + %if from_sep_smaller_crit == "static_linf": + coord_t tgt_stickout_rad = + (1 + stick_out_factor) * LEVEL_TO_RAD(tgt_level); - box_id_t slnws_start = same_level_non_well_sep_boxes_starts[box_id]; - box_id_t slnws_stop = same_level_non_well_sep_boxes_starts[box_id+1]; + %elif from_sep_smaller_crit == "precise_linf": + ${load_true_box_extent("tgt", "tgt_box_id", "target")} + + %endif + %endif + + box_id_t slnws_start = same_level_non_well_sep_boxes_starts[tgt_box_id]; + box_id_t slnws_stop = same_level_non_well_sep_boxes_starts[tgt_box_id+1]; // /!\ i is not a box_id, it's an index into same_level_non_well_sep_boxes_lists. for (box_id_t i = slnws_start; i < slnws_stop; ++i) { box_id_t same_lev_nws_box = same_level_non_well_sep_boxes_lists[i]; - if (same_lev_nws_box == box_id) + if (same_lev_nws_box == tgt_box_id) continue; // Colleagues (same-level NWS boxes) for 1-away are always adjacent, so @@ -658,21 +677,21 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) while (continue_walk) { // Loop invariant: - // walk_parent_box_id is, at first, always adjacent to box_id. + // walk_parent_box_id is, at first, always adjacent to tgt_box_id. // // This is true at the first level because colleagues are adjacent // by definition, and is kept true throughout the walk by only descending // into adjacent boxes. // // As we descend, we may find a child of an adjacent box that is - // non-adjacent to box_id. + // non-adjacent to tgt_box_id. // // If neither sources nor targets have extent, then that - // nonadjacent child box is added to box_id's from_sep_smaller ("list 3 - // far") and that's it. + // nonadjacent child box is added to tgt_box_id's from_sep_smaller + // ("list 3far") and that's it. // // If they have extent, then while they may be separated, the - // intersection of box_id's and the child box's stick-out region + // intersection of tgt_box_id's and the child box's stick-out region // may be non-empty, and we thus need to add that child to // from_sep_close_smaller ("list 3 close") for the interaction to be // done by direct evaluation. We also need to descend into that @@ -694,7 +713,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) int walk_level = box_levels[walk_box_id]; bool in_list_1 = is_adjacent_or_overlapping(root_extent, - center, level, walk_center, walk_level); + tgt_center, tgt_level, walk_center, walk_level); if (in_list_1) { @@ -714,21 +733,62 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) } else { - %if sources_have_extent or targets_have_extent: - const bool meets_crit = - meets_sep_smaller_criterion(root_extent, - center, level, - walk_center, walk_level, - stick_out_factor); + bool meets_sep_crit; + + <% assert not sources_have_extent %> + + %if not targets_have_extent: + meets_sep_crit = true; + + %elif from_sep_smaller_crit == "static_linf": + { + coord_t source_rad = LEVEL_TO_RAD(walk_level); + + // l-infty distance between source box and target box. + // Negative indicates overlap. + coord_t l_inf_dist = 0; + %for i in range(dimensions): + l_inf_dist = fmax( + l_inf_dist, + fabs(tgt_center.s${i} - walk_center.s${i}) + - tgt_stickout_rad + - source_rad); + %endfor + + meets_sep_crit = l_inf_dist >= + (2 - 8 * COORD_T_MACH_EPS) * source_rad; + } + + %elif from_sep_smaller_crit == "precise_linf": + { + coord_t source_rad = LEVEL_TO_RAD(walk_level); + + // l-infty distance between source box and target box. + // Negative indicates overlap. + coord_t l_inf_dist = 0; + %for i in range(dimensions): + l_inf_dist = fmax( + l_inf_dist, + fabs(tgt_ext_center.s${i} - walk_center.s${i}) + - tgt_radii_vec.s${i} + - source_rad); + %endfor + + meets_sep_crit = l_inf_dist >= + (2 - 8 * COORD_T_MACH_EPS) * source_rad; + } + %else: - const bool meets_crit = true; + <% raise ValueError( + "unknown value of from_sep_smaller_crit: %s" + % from_sep_smaller_crit) %> %endif // We're no longer *immediately* adjacent to our target // box, but our stick-out regions might still have a // non-empty intersection. - if (meets_crit) + if (meets_sep_crit) { if (from_sep_smaller_source_level == walk_level) APPEND_from_sep_smaller(walk_box_id); @@ -1429,16 +1489,62 @@ class _KernelInfo(Record): class FMMTraversalBuilder: - def __init__(self, context, well_sep_is_n_away=1): + def __init__(self, context, well_sep_is_n_away=1, from_sep_smaller_crit=None): self.context = context self.well_sep_is_n_away = well_sep_is_n_away + self.from_sep_smaller_crit = from_sep_smaller_crit # {{{ kernel builder @memoize_method def get_kernel_info(self, dimensions, particle_id_dtype, box_id_dtype, coord_dtype, box_level_dtype, max_levels, - sources_are_targets, sources_have_extent, targets_have_extent): + sources_are_targets, sources_have_extent, targets_have_extent, + extent_norm): + + # {{{ process from_sep_smaller_crit + + from_sep_smaller_crit = self.from_sep_smaller_crit + + # FIXME Adjust these defaults once it's known what they should be. + if extent_norm == "linf": + if from_sep_smaller_crit is None: + # same as before + from_sep_smaller_crit = "static_linf" + + elif extent_norm == "l2": + if from_sep_smaller_crit is None: + raise NotImplementedError( + "'from_sep_smaller_crit' was not specified. " + "Not specifying it should substitute in a " + "reasonable default. " + "Unfortunately, a reasonable default is not yet " + "known in the l^2 extent norms case.") + + if from_sep_smaller_crit == "static_linf": + raise ValueError( + "The static l^inf from-sep-smaller criterion " + "cannot be used with the l^2 extent norm") + + elif extent_norm is None: + assert not (sources_have_extent or targets_have_extent) + + if from_sep_smaller_crit is None: + # doesn't matter + from_sep_smaller_crit = "static_linf" + + else: + raise ValueError("unexpected value of 'extent_norm': %s" + % extent_norm) + + if from_sep_smaller_crit not in [ + "static_linf", "precise_linf", + "static_l2", "precise_l2", + ]: + raise ValueError("unexpected value of 'from_sep_smaller_crit': %s" + % from_sep_smaller_crit) + + # }}} logger.info("traversal build kernels: start build") @@ -1462,6 +1568,7 @@ class FMMTraversalBuilder: sources_have_extent=sources_have_extent, targets_have_extent=targets_have_extent, well_sep_is_n_away=self.well_sep_is_n_away, + from_sep_smaller_crit=from_sep_smaller_crit, ) from pyopencl.algorithm import ListOfListsBuilder from pyopencl.tools import VectorArg, ScalarArg @@ -1554,6 +1661,8 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), + VectorArg(coord_dtype, "box_target_bounding_box_min"), + VectorArg(coord_dtype, "box_target_bounding_box_max"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], ["from_sep_close_smaller"] @@ -1628,7 +1737,8 @@ class FMMTraversalBuilder: tree.dimensions, tree.particle_id_dtype, tree.box_id_dtype, tree.coord_dtype, tree.box_level_dtype, max_levels, tree.sources_are_targets, - tree.sources_have_extent, tree.targets_have_extent) + tree.sources_have_extent, tree.targets_have_extent, + tree.extent_norm) def fin_debug(s): if debug: @@ -1851,6 +1961,8 @@ class FMMTraversalBuilder: tree.stick_out_factor, target_boxes.data, same_level_non_well_sep_boxes.starts.data, same_level_non_well_sep_boxes.lists.data, + box_target_bounding_box_min.data, + box_target_bounding_box_max.data, ) from_sep_smaller_wait_for = [] diff --git a/test/test_fmm.py b/test/test_fmm.py index a32fda8..d3b61cc 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -240,6 +240,14 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( @pytest.mark.parametrize("well_sep_is_n_away", [1, 2]) +@pytest.mark.parametrize(("extent_norm", "from_sep_smaller_crit"), [ + ("linf", "static_linf"), + ("linf", "precise_linf"), + ("l2", "precise_linf"), + # NYI + #("l2", "static_l2"), + #("l2", "precise_l2"), + ]) @pytest.mark.parametrize(("dims", "nsources_req", "ntargets_req", "who_has_extent", "source_gen", "target_gen", "filter_kind"), [ @@ -261,18 +269,19 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "user"), #(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "user"), #(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "user"), - (2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user"), + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user"), #(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "user"), (2, 10**5, None, "", p_normal, p_normal, "tree"), (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "tree"), #(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "tree"), #(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "tree"), - (2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree"), + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree"), #(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "tree"), ]) def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, - who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away): + who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, + extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ @@ -322,14 +331,16 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, - debug=True, stick_out_factor=0.25) + debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) + tbuild = FMMTraversalBuilder(ctx, + well_sep_is_n_away=well_sep_is_n_away, + from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: -- GitLab From 5e473154e56ea40224a0f7f9ceaca37de32856d4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 1 Oct 2017 22:54:48 -0500 Subject: [PATCH 08/18] Add back check_l_infty_ball_overlap (removed without realizing it was used in the area query) --- boxtree/traversal.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index bbf8da0..b9366ce 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -160,6 +160,20 @@ TRAVERSAL_PREAMBLE_MAKO_DEFS = r"""//CL:mako// %endfor +<%def name="check_l_infty_ball_overlap( + is_overlapping, box_id, ball_radius, ball_center)"> + { + ${load_center("box_center", box_id)} + int box_level = box_levels[${box_id}]; + coord_t size_sum = LEVEL_TO_RAD(box_level) + ${ball_radius}; + coord_t max_dist = 0; + %for i in range(dimensions): + max_dist = fmax(max_dist, + fabs(${ball_center}.s${i} - box_center.s${i})); + %endfor + ${is_overlapping} = max_dist <= size_sum; + } + """ -- GitLab From 92ba4f62acbfd0333865ba28581cda2e6aed9c30 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 1 Oct 2017 22:56:20 -0500 Subject: [PATCH 09/18] Tree build: extent_norm: use None for default value --- boxtree/tree_build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index bef9751..2566dc4 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -78,7 +78,7 @@ class TreeBuilder(object): targets=None, source_radii=None, target_radii=None, stick_out_factor=None, refine_weights=None, max_leaf_refine_weight=None, wait_for=None, - extent_norm="linf", + extent_norm=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -145,6 +145,9 @@ class TreeBuilder(object): sources_have_extent = source_radii is not None targets_have_extent = target_radii is not None + if extent_norm is None: + extent_norm = "linf" + if extent_norm not in ["linf", "l2"]: raise ValueError("unexpected value of 'extent_norm': %s" % extent_norm) -- GitLab From 18cf7e7fcb423b94d52e1b82ce8f779aec62bf36 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 2 Oct 2017 12:46:15 -0500 Subject: [PATCH 10/18] Implement l2 List 3 far separation criteria --- boxtree/traversal.py | 108 +++++++++++++++++++++++++++++++++---------- test/test_fmm.py | 5 +- 2 files changed, 86 insertions(+), 27 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index b9366ce..f6b3239 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -141,25 +141,6 @@ TRAVERSAL_PREAMBLE_MAKO_DEFS = r"""//CL:mako// } -<%def name="load_true_box_linf_radius(name, center, box_id, kind, declare=True)"> - %if declare: - coord_t ${name}; - %endif - - ${name} = 0; - - %for iaxis in range(dimensions): - %for bound in ["min", "max"]: - ${name} = fmax( - ${name}, - fabs( - ${center}.${AXIS_NAMES[iaxis]} - - box_${kind}_bounding_box_${bound}[ - ${iaxis} * aligned_nboxes + ${box_id}])); - %endfor - %endfor - - <%def name="check_l_infty_ball_overlap( is_overlapping, box_id, ball_radius, ball_center)"> { @@ -202,6 +183,8 @@ typedef ${dtype_to_ctype(vec_types_dict[coord_dtype, dimensions])} coord_vec_t; %else: #define dbg_printf(ARGS) /* */ %endif + +#define square(x) ((x)*(x)) """ @@ -660,13 +643,35 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) int tgt_level = box_levels[tgt_box_id]; %if targets_have_extent: - %if from_sep_smaller_crit == "static_linf": - coord_t tgt_stickout_rad = + %if from_sep_smaller_crit in ["static_linf", "static_l2"]: + coord_t tgt_stickout_l_inf_rad = (1 + stick_out_factor) * LEVEL_TO_RAD(tgt_level); %elif from_sep_smaller_crit == "precise_linf": ${load_true_box_extent("tgt", "tgt_box_id", "target")} + %elif from_sep_smaller_crit == "precise_l2": + ${load_true_box_extent("tgt", "tgt_box_id", "target")} + + coord_t tgt_l_2_true_radius; + { + coord_t tgt_l_2_true_radius_squared = 0; + + %for iaxis in range(dimensions): + tgt_l_2_true_radius_squared += square(fmax( + %for bound, trailing_comma in [("min", ","), ("max", "")]: + fabs( + tgt_center.s${iaxis} + - box_target_bounding_box_${bound}[ + ${iaxis} * aligned_nboxes + tgt_box_id]) + ${trailing_comma} + %endfor + )); + %endfor + + tgt_l_2_true_radius = sqrt(tgt_l_2_true_radius_squared); + } + %endif %endif @@ -758,14 +763,14 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) { coord_t source_rad = LEVEL_TO_RAD(walk_level); - // l-infty distance between source box and target box. + // l^infty distance between source box and target box. // Negative indicates overlap. coord_t l_inf_dist = 0; %for i in range(dimensions): l_inf_dist = fmax( l_inf_dist, fabs(tgt_center.s${i} - walk_center.s${i}) - - tgt_stickout_rad + - tgt_stickout_l_inf_rad - source_rad); %endfor @@ -777,7 +782,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) { coord_t source_rad = LEVEL_TO_RAD(walk_level); - // l-infty distance between source box and target box. + // l^infty distance between source box and target box. // Negative indicates overlap. coord_t l_inf_dist = 0; %for i in range(dimensions): @@ -792,6 +797,61 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) (2 - 8 * COORD_T_MACH_EPS) * source_rad; } + %elif from_sep_smaller_crit == "static_l2": + { + coord_t source_l_inf_rad = LEVEL_TO_RAD(walk_level); + + // l^2 distance between source box and target centers. + coord_t l_2_squared_center_dist = + 0 + %for i in range(dimensions): + + square(tgt_center.s${i} - walk_center.s${i}) + %endfor + ; + + // l^2 distance between source box and target box. + // Negative indicates overlap. + coord_t l_2_box_dist = + sqrt(l_2_squared_center_dist) + - sqrt((coord_t) (${dimensions})) + * tgt_stickout_l_inf_rad + - source_l_inf_rad; + + meets_sep_crit = l_2_box_dist >= + (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; + } + + %elif from_sep_smaller_crit in ["precise_l2", "static_l2"]: + { + coord_t source_l_inf_rad = LEVEL_TO_RAD(walk_level); + + // l^2 distance between source box and target centers. + coord_t l_2_squared_center_dist = + 0 + %for i in range(dimensions): + + square(tgt_center.s${i} - walk_center.s${i}); + %endfor + ; + + // l^2 distance between source box and target box. + // Negative indicates overlap. + coord_t l_2_box_dist = + sqrt(l_2_squared_center_dist) + - sqrt((coord_t) (${dimensions})) + %if from_sep_smaller_crit == "static_l2": + * tgt_stickout_l_inf_rad + %elif from_sep_smaller_crit == "precise_l2": + * tgt_l_2_true_radius + %else: + <% raise ValueError( + "from_sep_smaller_crit") %> + %endif + - source_l_inf_rad; + + meets_sep_crit = l_2_box_dist >= + (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; + } + %else: <% raise ValueError( "unknown value of from_sep_smaller_crit: %s" diff --git a/test/test_fmm.py b/test/test_fmm.py index d3b61cc..ef0b23f 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -244,9 +244,8 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( ("linf", "static_linf"), ("linf", "precise_linf"), ("l2", "precise_linf"), - # NYI - #("l2", "static_l2"), - #("l2", "precise_l2"), + ("l2", "static_l2"), + ("l2", "precise_l2"), ]) @pytest.mark.parametrize(("dims", "nsources_req", "ntargets_req", "who_has_extent", "source_gen", "target_gen", "filter_kind"), -- GitLab From f455d99837c07fb257cedee66feffe8f61a7db49 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 2 Oct 2017 18:22:59 -0500 Subject: [PATCH 11/18] Cut down the number of test cases in test_fmm --- test/test_fmm.py | 56 ++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index ef0b23f..8485107 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -240,43 +240,29 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( @pytest.mark.parametrize("well_sep_is_n_away", [1, 2]) -@pytest.mark.parametrize(("extent_norm", "from_sep_smaller_crit"), [ - ("linf", "static_linf"), - ("linf", "precise_linf"), - ("l2", "precise_linf"), - ("l2", "static_l2"), - ("l2", "precise_l2"), - ]) @pytest.mark.parametrize(("dims", "nsources_req", "ntargets_req", - "who_has_extent", "source_gen", "target_gen", "filter_kind"), + "who_has_extent", "source_gen", "target_gen", "filter_kind", + "extent_norm", "from_sep_smaller_crit"), [ - (2, 10**5, None, "", p_normal, p_normal, None), - (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None), - #(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, None), - #(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, None), - (2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None), - #(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, None), - - (3, 10**5, None, "", p_normal, p_normal, None), - (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None), - #(3, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, None), - #(3, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, None), - (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None), - #(3, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, None), - - (2, 10**5, None, "", p_normal, p_normal, "user"), - (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "user"), - #(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "user"), - #(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "user"), - (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user"), - #(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "user"), - - (2, 10**5, None, "", p_normal, p_normal, "tree"), - (3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "tree"), - #(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "tree"), - #(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "tree"), - (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree"), - #(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "tree"), + (2, 10**5, None, "", p_normal, p_normal, None, "linf", "static_linf"), + (2, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501 + (2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501 + + (3, 10**5, None, "", p_normal, p_normal, None, "linf", "static_linf"), + (3, 5 * 10**5, 4*10**4, "", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501 + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501 + + (2, 10**5, None, "", p_normal, p_normal, "user", "linf", "static_linf"), + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user", "linf", "static_linf"), # noqa: E501 + (2, 10**5, None, "", p_normal, p_normal, "tree", "linf", "static_linf"), + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree", "linf", "static_linf"), # noqa: E501 + + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501 + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "precise_linf"), # noqa: E501 + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "precise_linf"), # noqa: E501 + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "static_l2"), # noqa: E501 + (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "precise_l2"), # noqa: E501 + ]) def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, -- GitLab From c6116868a077d20ab864917c23768fc937acca5d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 2 Oct 2017 23:17:13 -0500 Subject: [PATCH 12/18] Remove code for precise_l2 L3 far criterion --- boxtree/traversal.py | 35 +++-------------------------------- test/test_fmm.py | 1 - 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index f6b3239..fda07ca 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -650,28 +650,6 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) %elif from_sep_smaller_crit == "precise_linf": ${load_true_box_extent("tgt", "tgt_box_id", "target")} - %elif from_sep_smaller_crit == "precise_l2": - ${load_true_box_extent("tgt", "tgt_box_id", "target")} - - coord_t tgt_l_2_true_radius; - { - coord_t tgt_l_2_true_radius_squared = 0; - - %for iaxis in range(dimensions): - tgt_l_2_true_radius_squared += square(fmax( - %for bound, trailing_comma in [("min", ","), ("max", "")]: - fabs( - tgt_center.s${iaxis} - - box_target_bounding_box_${bound}[ - ${iaxis} * aligned_nboxes + tgt_box_id]) - ${trailing_comma} - %endfor - )); - %endfor - - tgt_l_2_true_radius = sqrt(tgt_l_2_true_radius_squared); - } - %endif %endif @@ -821,7 +799,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; } - %elif from_sep_smaller_crit in ["precise_l2", "static_l2"]: + %elif from_sep_smaller_crit in == "static_l2": { coord_t source_l_inf_rad = LEVEL_TO_RAD(walk_level); @@ -838,14 +816,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) coord_t l_2_box_dist = sqrt(l_2_squared_center_dist) - sqrt((coord_t) (${dimensions})) - %if from_sep_smaller_crit == "static_l2": - * tgt_stickout_l_inf_rad - %elif from_sep_smaller_crit == "precise_l2": - * tgt_l_2_true_radius - %else: - <% raise ValueError( - "from_sep_smaller_crit") %> - %endif + * tgt_stickout_l_inf_rad - source_l_inf_rad; meets_sep_crit = l_2_box_dist >= @@ -1613,7 +1584,7 @@ class FMMTraversalBuilder: if from_sep_smaller_crit not in [ "static_linf", "precise_linf", - "static_l2", "precise_l2", + "static_l2", ]: raise ValueError("unexpected value of 'from_sep_smaller_crit': %s" % from_sep_smaller_crit) diff --git a/test/test_fmm.py b/test/test_fmm.py index 8485107..cb5e502 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -261,7 +261,6 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "precise_linf"), # noqa: E501 (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "precise_linf"), # noqa: E501 (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "static_l2"), # noqa: E501 - (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "precise_l2"), # noqa: E501 ]) def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, -- GitLab From f4056a14560b071e5011fe4b92533fa16a279779 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 2 Oct 2017 23:33:38 -0500 Subject: [PATCH 13/18] Fix: Remove code for precise_l2 L3 far criterion --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index fda07ca..0ae58e5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -799,7 +799,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; } - %elif from_sep_smaller_crit in == "static_l2": + %elif from_sep_smaller_crit == "static_l2": { coord_t source_l_inf_rad = LEVEL_TO_RAD(walk_level); -- GitLab From 53ec62c24ffcd3bfac5ced9e1ed191c751c1ffea Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 4 Oct 2017 19:42:49 -0500 Subject: [PATCH 14/18] Create a list 3 balancing knob: from_sep_smaller_min_nsources_cumul --- boxtree/traversal.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 0ae58e5..639dcc9 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -833,7 +833,9 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) // box, but our stick-out regions might still have a // non-empty intersection. - if (meets_sep_crit) + if (meets_sep_crit + && box_source_counts_cumul[walk_box_id] + >= from_sep_smaller_min_nsources_cumul) { if (from_sep_smaller_source_level == walk_level) APPEND_from_sep_smaller(walk_box_id); @@ -1708,6 +1710,9 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_lists"), VectorArg(coord_dtype, "box_target_bounding_box_min"), VectorArg(coord_dtype, "box_target_bounding_box_max"), + VectorArg(particle_id_dtype, "box_source_counts_cumul"), + ScalarArg(particle_id_dtype, + "from_sep_smaller_min_nsources_cumul"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], ["from_sep_close_smaller"] @@ -1752,7 +1757,8 @@ class FMMTraversalBuilder: # {{{ driver - def __call__(self, queue, tree, wait_for=None, debug=False): + def __call__(self, queue, tree, wait_for=None, debug=False, + _from_sep_smaller_min_nsources_cumul=None): """ :arg queue: A :class:`pyopencl.CommandQueue` instance. :arg tree: A :class:`boxtree.Tree` instance. @@ -1764,6 +1770,10 @@ class FMMTraversalBuilder: for dependency management. """ + if _from_sep_smaller_min_nsources_cumul is None: + # default to old no-threshold behavior + _from_sep_smaller_min_nsources_cumul = 0 + if not tree._is_pruned: raise ValueError("tree must be pruned for traversal generation") @@ -2008,6 +2018,8 @@ class FMMTraversalBuilder: same_level_non_well_sep_boxes.lists.data, box_target_bounding_box_min.data, box_target_bounding_box_max.data, + tree.box_source_counts_cumul.data, + _from_sep_smaller_min_nsources_cumul, ) from_sep_smaller_wait_for = [] -- GitLab From 1a8ae829694dabb68266d8c59aa66c7ce075d72f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 5 Oct 2017 11:13:28 -0500 Subject: [PATCH 15/18] Default to precise_linf list 3 criterion if not given --- boxtree/traversal.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 639dcc9..999e71c 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1553,21 +1553,14 @@ class FMMTraversalBuilder: from_sep_smaller_crit = self.from_sep_smaller_crit - # FIXME Adjust these defaults once it's known what they should be. + if from_sep_smaller_crit is None: + from_sep_smaller_crit = "precise_linf" + if extent_norm == "linf": - if from_sep_smaller_crit is None: - # same as before - from_sep_smaller_crit = "static_linf" + # no special checks needed + pass elif extent_norm == "l2": - if from_sep_smaller_crit is None: - raise NotImplementedError( - "'from_sep_smaller_crit' was not specified. " - "Not specifying it should substitute in a " - "reasonable default. " - "Unfortunately, a reasonable default is not yet " - "known in the l^2 extent norms case.") - if from_sep_smaller_crit == "static_linf": raise ValueError( "The static l^inf from-sep-smaller criterion " -- GitLab From cbba374f2fd9a6751ba594d8a3de70b5e872b565 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 6 Oct 2017 17:07:43 -0500 Subject: [PATCH 16/18] Fix doc typo [ci skip] --- boxtree/tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/tree.py b/boxtree/tree.py index a22a102..ae8e8f0 100644 --- a/boxtree/tree.py +++ b/boxtree/tree.py @@ -114,7 +114,7 @@ class Tree(DeviceDataRecord): One of ``None``, ``"l2"`` or ``"linf"``. If *None*, particles do not have extent. If not *None*, indicates the norm with which extent-bearing particles - are determined to lie 'inside' a box's, taking into account the box's + are determined to lie 'inside' a box, taking into account the box's :attr:`stick_out_factor`. This image illustrates the difference in semantics: -- GitLab From d00fc8e715f2684972afec3957cfa806741a9287 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 7 Oct 2017 00:09:10 -0500 Subject: [PATCH 17/18] Fixes in response to @mattwala's review --- boxtree/traversal.py | 43 +++++++++++++++++-------------------------- boxtree/tree.py | 16 +++++++++++----- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 999e71c..5ebbd4c 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -649,6 +649,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) %elif from_sep_smaller_crit == "precise_linf": ${load_true_box_extent("tgt", "tgt_box_id", "target")} + // defines tgt_ext_center, tgt_radii_vec %endif %endif @@ -799,30 +800,6 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; } - %elif from_sep_smaller_crit == "static_l2": - { - coord_t source_l_inf_rad = LEVEL_TO_RAD(walk_level); - - // l^2 distance between source box and target centers. - coord_t l_2_squared_center_dist = - 0 - %for i in range(dimensions): - + square(tgt_center.s${i} - walk_center.s${i}); - %endfor - ; - - // l^2 distance between source box and target box. - // Negative indicates overlap. - coord_t l_2_box_dist = - sqrt(l_2_squared_center_dist) - - sqrt((coord_t) (${dimensions})) - * tgt_stickout_l_inf_rad - - source_l_inf_rad; - - meets_sep_crit = l_2_box_dist >= - (2 - 8 * COORD_T_MACH_EPS) * source_l_inf_rad; - } - %else: <% raise ValueError( "unknown value of from_sep_smaller_crit: %s" @@ -1220,8 +1197,7 @@ class FMMTraversalInfo(DeviceDataRecord): .. ------------------------------------------------------------------------ The attributes in this section are only available if the respective - particle type (source/target) has extents and if :attr:`Tree.extent_norm` - is ``"linf"``. + particle type (source/target) has extents. If they are not available, the corresponding attributes will be *None*. @@ -1537,6 +1513,18 @@ class _KernelInfo(Record): class FMMTraversalBuilder: def __init__(self, context, well_sep_is_n_away=1, from_sep_smaller_crit=None): + """ + :arg well_sep_is_n_away: Either An integer 1 or greater. (Only 2 is tested) + The spacing between boxes that is considered "well-separated" for + :attr:`from_sep_siblings` (List 2). + :arg from_sep_smaller_crit: The criterion used to determine separation + box dimensions and separation for :attr:`from_sep_smaller_by_level` + (List 3). May be one of ``"static_linf"`` (use the box square, + possibly enlarged by :attr:`Tree.stick_out_factor`), ``"precise_linf"` + (use the precise extent of targets in the box, including their radii), + or ``"static_l2"`` (use the circumcircle of the box, + possibly enlarged by :attr:`Tree.stick_out_factor`). + """ self.context = context self.well_sep_is_n_away = well_sep_is_n_away self.from_sep_smaller_crit = from_sep_smaller_crit @@ -1562,6 +1550,9 @@ class FMMTraversalBuilder: elif extent_norm == "l2": if from_sep_smaller_crit == "static_linf": + # Not technically necessary, but static linf will assume box + # bounds that are not guaranteed to contain all particle + # extetns. raise ValueError( "The static l^inf from-sep-smaller criterion " "cannot be used with the l^2 extent norm") diff --git a/boxtree/tree.py b/boxtree/tree.py index ae8e8f0..518d61a 100644 --- a/boxtree/tree.py +++ b/boxtree/tree.py @@ -106,9 +106,14 @@ class Tree(DeviceDataRecord): .. attribute:: stick_out_factor - The fraction of the (:math:`l^\infty`) box radius by which the - :math:`l^\infty` circles given by :attr:`source_radii` may stick out - the box in which they are contained. A scalar. + A scalar used for calculating how much particles with extent may + overextend their containing box. + + Each box in the tree can be thought of as being surrounded by a + fictitious box whose :math:`l^\infty` radius is `1 + stick_out_factor` + larger. Particles with extent are allowed to extend inside (a) the + ficitious box or (b) a disk surrounding the fictious box, depending on + :attr:`extent_norm`. .. attribute:: extent_norm @@ -121,8 +126,9 @@ class Tree(DeviceDataRecord): .. image:: images/linf-l2.png - In the figure, the box has radius :math:`R`, the particle has radius - :math:`r`, and :attr:`stick_out_factor` is denoted :math:`\alpha`. + In the figure, the box has (:math:`\ell^\infty`) radius :math:`R`, the + particle has radius :math:`r`, and :attr:`stick_out_factor` is denoted + :math:`\alpha`. .. attribute:: nsources -- GitLab From 3aea375c87a2d0c4994783e1864dbc5db55750ad Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 7 Oct 2017 12:28:30 -0500 Subject: [PATCH 18/18] Fix comment typo (thanks @mattwala) [ci skip] --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 5ebbd4c..a35c60d 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1552,7 +1552,7 @@ class FMMTraversalBuilder: if from_sep_smaller_crit == "static_linf": # Not technically necessary, but static linf will assume box # bounds that are not guaranteed to contain all particle - # extetns. + # extents. raise ValueError( "The static l^inf from-sep-smaller criterion " "cannot be used with the l^2 extent norm") -- GitLab