diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fcfd149160782fb144e2631134ccc1d25ba74017..6e7cbc519368f8d9ff68aaed01a5aaeab30be3e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,23 @@ jobs: curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh . ./build-and-test-py-project.sh + examples: + name: Examples Py3 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: "Main Script" + run: | + EXTRA_INSTALL="numpy pymbolic" + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh + . ./ci-support.sh + build_py_project_in_venv + run_examples + docs: name: Documentation runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index f9730d3088c56834c00552ceb706ad2ad705a8d4..478e0bff9fb7964ce23e2fa1799cc40424d71487 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ distribute*tar.gz .cache .mypy_cache + +*.dat diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9b9500c0256879e8a49969ff9fa351e66eba2d08..82757f6676ad244edeb609d1db01d693c22fd143 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -Python 3: +Pytest: script: - py_version=3 - export EXTRA_INSTALL="numpy" @@ -12,6 +12,18 @@ Python 3: reports: junit: test/pytest.xml +Examples: + script: | + EXTRA_INSTALL="numpy pymbolic" + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/ci-support.sh + . ./ci-support.sh + build_py_project_in_venv + run_examples + tags: + - python3 + except: + - tags + Flake8: script: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh diff --git a/doc/conf.py b/doc/conf.py index bf3d374a87bd8d4ca4967c1ef414ffebe9bba98a..dea64997c5c73f553104c1f7d214f6181d209f87 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -36,6 +36,7 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode'] +autoclass_content = "class" # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -176,8 +177,7 @@ texinfo_documents = [ intersphinx_mapping = { - 'http://docs.python.org/dev': None, - 'http://docs.scipy.org/doc/numpy/': None, - } - - + "https://docs.python.org/3": None, + "https://numpy.org/doc/stable": None, + "https://documen.tician.de/pymbolic/": None, + } diff --git a/doc/index.rst b/doc/index.rst index 24e1d66373b8112d03b8ae9bc8ff343e50622ce8..adeecf8ab771f3ff5fa8a705afaddbfaee8d87f4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -10,6 +10,7 @@ Welcome to pytools's documentation! persistent_dict graph codegen + log misc Indices and tables diff --git a/doc/log.rst b/doc/log.rst new file mode 100644 index 0000000000000000000000000000000000000000..dabef32866ee11a41972ed737eb2a1d26e7f4241 --- /dev/null +++ b/doc/log.rst @@ -0,0 +1,109 @@ +Time series logging +=================== + +How do I use this? +------------------ + +Consider this example of a 'simulation' logging various quantities: + +.. literalinclude:: ../examples/log.py + +(You can find this example as +:download:`examples/log.py <../examples/log.py>` in the pytools +source distribution.) + +Running this script will give you a file ``mylog.dat``. Note how log quantities +given as "watches" are also periodically logged to the console. + +To analyze this data, we need some tools that are distributed as part of the +`'datapyle' package on Github `__, which +we need to install first:: + + $ pip install git+https://github.com/inducer/datapyle.git + +To prepare this data set for analysis, run ``runalyzer-gather`` as a script:: + + $ runalyzer-gather summary.dat mylog.dat + Scanning... [########################################] ETA ? + Importing... [########################################] ETA ? + +This step may look unnecessary here, but it serves two important functions: + +- If you would like to compare data from multiple runs, you can simply + add data files from multiple runs and analyze them side-by-side. +- For distributed-memory runs, you can likewise gve data sets from + multiple ranks to be gathered in a single file for centralized analysis. +- As well as, naturally, any combination of the above. + +After this, you may use ``runalyzer`` (also in ``datapyle``) to analyze +the summary file:: + + # -m enables '$quantity' in SQL + $ runalyzer -m summary.dat + Runalyzer running on Python 3.8.5 (default, Aug 2 2020, 15:09:07) + [GCC 10.2.0] + Copyright (c) Andreas Kloeckner 2008 + Run .help to see help for 'magic' commands + >>> + +This is a normal Python console, but it has a few extra tricks up its +sleeve. ``.help`` gives an overview. Here, we'll focus on demonstrating +two interesting features. First, lets get an overview of all the +logged quantities:: + + >>> .quantities + id | name | unit | description | rank_aggregator + ---+---------+------+-----------------------------+----------------- + 7 | dt | s | Simulation Timestep | None + 9 | fifteen | None | None | None + 5 | step | 1 | Timesteps | None + 2 | t_2step | s | Step-to-step duration | None + 3 | t_cpu | s | Wall time | None + 4 | t_log | s | Time spent updating the log | None + 6 | t_sim | s | Simulation Time | None + 1 | t_step | s | Time step duration | None + 8 | t_vis | s | Time spent visualizing | None + +Next, we could plot, e.g. time step duration against simulation time:: + + >>> .plot select $t_sim, $t_2step + +Next, a fact that the ``select`` may have given away: The argument to +``.plot`` is a SQL query that is passed on to :mod:`sqlite3`, with support +for some additional syntax. You can see what this query expanded to internally:: + + >>> mangle_sql("select $t_sim, $t_2step") + 'select t_sim.value, t_2step.value from runs inner join t_sim on (t_sim.run_id = runs.id) inner join t_2step on (t_2step.run_id = runs.id and t_sim.step = t_2step.step and t_sim.rank=t_2step.rank) ' + +As you can see, by default these queries return data from all runs. +You can filter which runs you are interested in through run properties:: + + >>> .runprops + cmdline + date + dirname + filename + id + is_parallel + machine + myconst + rank_count + schema_version + unique_run_id + unixtime + +simply by adding a SQL ``where`` clause referring to these properties. + +As you can see from the demo, you can of course add your own, +application-focused run properties. + +This means that you have the full power of SQL available to analyze +your time series data, for example: + +- Plot the solution 2-norms of all runs that were run on a Tuesday between time + step 2000 and 3000 +- Make a scatterplot correlating memory transfer amounts and simulation time + +.. automodule:: pytools.log + +.. no-doctest diff --git a/examples/log.py b/examples/log.py new file mode 100644 index 0000000000000000000000000000000000000000..4eff36708dc3ca82f6dfd8cde6a50d0120c35f3b --- /dev/null +++ b/examples/log.py @@ -0,0 +1,52 @@ +from time import sleep +from random import uniform +from pytools.log import (LogManager, add_general_quantities, + add_simulation_quantities, add_run_info, IntervalTimer, + LogQuantity, set_dt) + +from warnings import warn + + +class Fifteen(LogQuantity): + def __call__(self): + return 15 + + +def main(): + logmgr = LogManager("mylog.dat", "w") # , comm=... + + # set a run property + logmgr.set_constant("myconst", uniform(0, 1)) + + add_run_info(logmgr) + add_general_quantities(logmgr) + add_simulation_quantities(logmgr) + + vis_timer = IntervalTimer("t_vis", "Time spent visualizing") + logmgr.add_quantity(vis_timer) + logmgr.add_quantity(Fifteen("fifteen")) + logmgr.add_watches(["step.max", "t_sim.max", "t_step.max"]) + + for istep in range(200): + logmgr.tick_before() + + dt = uniform(0.01, 0.1) + set_dt(logmgr, dt) + sleep(dt) + + # Illustrate custom timers + if istep % 10 == 0: + with vis_timer.start_sub_timer(): + sleep(0.05) + + # Illustrate warnings capture + if uniform(0, 1) < 0.05: + warn("Oof. Something went awry.") + + logmgr.tick_after() + + logmgr.close() + + +if __name__ == "__main__": + main() diff --git a/pytools/log.py b/pytools/log.py index 104cc82dd2168f6cdc9faf71bd81cde134beafa1..f023b5085000f1061a976ed95c8545e7b524e750 100644 --- a/pytools/log.py +++ b/pytools/log.py @@ -1,6 +1,60 @@ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function +""" +Log Quantity Abstract Interfaces +-------------------------------- + +.. autoclass:: LogQuantity +.. autoclass:: PostLogQuantity +.. autoclass:: MultiLogQuantity +.. autoclass:: MultiPostLogQuantity + +Log Manager +----------- + +.. autoclass:: LogManager +.. autofunction:: add_run_info + +Built-in Log General-Purpose Quantities +--------------------------------------- +.. autoclass:: IntervalTimer +.. autoclass:: LogUpdateDuration +.. autoclass:: EventCounter +.. autoclass:: TimestepCounter +.. autoclass:: StepToStepDuration +.. autoclass:: TimestepDuration +.. autoclass:: CPUTime +.. autoclass:: ETA +.. autofunction:: add_general_quantities + +Built-in Log Simulation-Related Quantities +------------------------------------------ +.. autoclass:: SimulationTime +.. autoclass:: Timestep +.. autofunction:: set_dt +.. autofunction:: add_simulation_quantities +""" + +__copyright__ = "Copyright (C) 2009-2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + import logging import six @@ -30,7 +84,12 @@ def time(): # {{{ abstract logging interface class LogQuantity(object): - """A source of loggable scalars.""" + """A source of loggable scalars. + + .. automethod:: __init__ + .. automethod:: tick + .. automethod:: __call__ + """ sort_weight = 0 @@ -57,8 +116,12 @@ class LogQuantity(object): class PostLogQuantity(LogQuantity): - """A source of loggable scalars.""" + """A source of loggable scalars. + .. automethod:: __init__ + .. automethod:: tick + .. automethod:: prepare_for_tick + """ sort_weight = 0 def prepare_for_tick(self): @@ -264,10 +327,10 @@ def _set_up_schema(db_conn): class LogManager(object): - """A parallel-capable diagnostic time-series logging facility. - It is meant to log data from a computation, with certain log - quantities available before a cycle, and certain other ones - afterwards. A timeline of invocations looks as follows:: + """A distributed-memory-capable diagnostic time-series logging facility. + It is meant to log data from a computation, with certain log quantities + available before a cycle, and certain other ones afterwards. A timeline of + invocations looks as follows:: tick_before() compute... @@ -294,8 +357,32 @@ class LogManager(object): If MPI parallelism is used, the "head rank" below always refers to rank 0. - Command line tools called :command:`runalyzer` and :command:`logtool` - (deprecated) are available for looking at the data in a saved log. + Command line tools called :command:`runalyzer` (in the `datapyle package + `__) are available for looking at the + data in a saved log. + + .. automethod:: __init__ + .. automethod:: save + .. automethod:: close + + .. rubric:: Data retrieval + + .. automethod:: get_table + .. automethod:: get_warnings + .. automethod:: get_expr_dataset + .. automethod:: get_joint_dataset + + .. rubric:: Configuration + + .. automethod:: capture_warnings + .. automethod:: add_watches + .. automethod:: set_constant + .. automethod:: add_quantity + + .. rubric:: Time Loop + + .. automethod:: tick_before + .. automethod:: tick_after """ def __init__(self, filename=None, mode="r", mpi_comm=None, capture_warnings=True, @@ -307,7 +394,7 @@ class LogManager(object): :param mode: One of "w", "r" for write, read. "w" assumes that the database is initially empty. May also be "wu" to indicate that a unique filename should be chosen automatically. - :arg mpi_comm: A :mod:`mpi4py.MPI.Comm`. If given, logs are + :arg mpi_comm: A `mpi4py.MPI.Comm`. If given, logs are periodically synchronized to the head node, which then writes them out to disk. :param capture_warnings: Tap the Python warnings facility and save warnings @@ -693,12 +780,12 @@ class LogManager(object): def get_expr_dataset(self, expression, description=None, unit=None): """Prepare a time-series dataset for a given expression. - @arg expression: A C{pymbolic} expression that may involve + :arg expression: A :mod:`pymbolic` expression that may involve the time-series variables and the constants in this :class:`LogManager`. If there is data from multiple ranks for a quantity occuring in this expression, an aggregator may have to be specified. - @return: C{(description, unit, table)}, where C{table} - is a list of tuples C{(tick_nbr, value)}. + :returns: ``(description, unit, table)``, where *table* + is a list of tuples ``(tick_nbr, value)``. Aggregators are specified as follows: - C{qty.min}, C{qty.max}, C{qty.avg}, C{qty.sum}, C{qty.norm2} @@ -747,13 +834,13 @@ class LogManager(object): def get_joint_dataset(self, expressions): """Return a joint data set for a list of expressions. - @arg expressions: a list of either strings representing + :arg expressions: a list of either strings representing expressions directly, or triples (descr, unit, expr). In the former case, the description and the unit are found automatically, if possible. In the latter case, they are used as specified. - @return: A triple C{(descriptions, units, table)}, where - C{table} is a a list of C{[(tstep, (val_expr1, val_expr2,...)...]}. + :returns: A triple ``(descriptions, units, table)``, where + *table* is a a list of C{[(tstep, (val_expr1, val_expr2,...)...]}. """ # dubs is a list of (desc, unit, table) triples as @@ -811,7 +898,7 @@ class LogManager(object): outf.close() def plot_matplotlib(self, expr_x, expr_y): - from pylab import xlabel, ylabel, plot + from matplotlib.pyplot import xlabel, ylabel, plot (data_x, descr_x, unit_x), (data_y, descr_y, unit_y) = \ self.get_plot_data(expr_x, expr_y) @@ -966,13 +1053,25 @@ class _SubTimer: del self.start_time return self + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + def submit(self): self.itimer.add_time(self.elapsed) del self.elapsed class IntervalTimer(PostLogQuantity): - """Records elapsed times.""" + """Records elapsed times supplied by the user either through + sub-timers, or by explicitly calling :meth:`add_time`. + + .. automethod:: __init__ + .. automethod:: start_sub_timer + .. automethod:: add_time + """ def __init__(self, name, description=None): LogQuantity.__init__(self, name, "s", description) @@ -992,7 +1091,10 @@ class IntervalTimer(PostLogQuantity): class LogUpdateDuration(LogQuantity): - """Records how long the last :meth:`LogManager.tick` invocation took.""" + """Records how long the last log update in :class:`LogManager` took. + + .. automethod:: __init__ + """ # FIXME this is off by one tick @@ -1005,7 +1107,12 @@ class LogUpdateDuration(LogQuantity): class EventCounter(PostLogQuantity): - """Counts events signaled by :meth:`add`.""" + """Counts events signaled by :meth:`add`. + + .. automethod:: __init__ + .. automethod:: add + .. automethod:: transfer + """ def __init__(self, name="interval", description=None): PostLogQuantity.__init__(self, name, "1", description) @@ -1039,7 +1146,7 @@ def time_and_count_function(f, timer, counter=None, increment=1): class TimestepCounter(LogQuantity): - """Counts the number of times :meth:`LogManager.tick` is called.""" + """Counts the number of times :class:`LogManager` ticks.""" def __init__(self, name="step"): LogQuantity.__init__(self, name, "1", "Timesteps") @@ -1055,6 +1162,8 @@ class StepToStepDuration(PostLogQuantity): """Records the CPU time between invocations of :meth:`LogManager.tick_before` and :meth:`LogManager.tick_after`. + + .. automethod:: __init__ """ def __init__(self, name="t_2step"): @@ -1077,6 +1186,8 @@ class TimestepDuration(PostLogQuantity): """Records the CPU time between the starts of time steps. :meth:`LogManager.tick_before` and :meth:`LogManager.tick_after`. + + .. automethod:: __init__ """ # We would like to run last, so that if log gathering takes any @@ -1098,7 +1209,10 @@ class TimestepDuration(PostLogQuantity): class CPUTime(LogQuantity): - """Records (monotonically increasing) CPU time.""" + """Records (monotonically increasing) CPU time. + + .. automethod:: __init__ + """ def __init__(self, name="t_cpu"): LogQuantity.__init__(self, name, "s", "Wall time") @@ -1109,7 +1223,10 @@ class CPUTime(LogQuantity): class ETA(LogQuantity): - """Records an estimate of how long the computation will still take.""" + """Records an estimate of how long the computation will still take. + + .. automethod:: __init__ + """ def __init__(self, total_steps, name="t_eta"): LogQuantity.__init__(self, name, "s", "Estimated remaining duration") @@ -1128,7 +1245,7 @@ class ETA(LogQuantity): def add_general_quantities(mgr): - """Add generally applicable :class:`LogQuantity` objects to C{mgr}.""" + """Add generally applicable :class:`LogQuantity` objects to *mgr*.""" mgr.add_quantity(TimestepDuration()) mgr.add_quantity(StepToStepDuration())