Skip to content
Snippets Groups Projects
dump-performance.py 1.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • import pyopencl as cl
    import pyopencl.characterize.performance as perf
    
    
    def main():
        ctx = cl.create_some_context()
    
        prof_overhead, latency = perf.get_profiling_overhead(ctx)
    
        print("command latency: %g s" % latency)
    
        print("profiling overhead: {:g} s -> {:.1f} %".format(
    
                prof_overhead, 100*prof_overhead/latency))
    
        queue = cl.CommandQueue(
                ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    
        print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
        print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))
    
    
        for tx_type in [
                perf.HostToDeviceTransfer,
                perf.DeviceToHostTransfer,
                perf.DeviceToDeviceTransfer]:
    
            print("----------------------------------------")
            print(tx_type.__name__)
            print("----------------------------------------")
    
            print("latency: %g s" % perf.transfer_latency(queue, tx_type))
    
            for i in range(6, 31, 2):
                bs = 1 << i
    
                    result = "%g GB/s" % (
                            perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
    
                except Exception as e:
                    result = "exception: %s" % e.__class__.__name__
                print("bandwidth @ %d bytes: %s" % (bs, result))
    
    if __name__ == "__main__":
        main()