diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2f447df4f7c601672f037ffe3374d216d1ddd151..21ba2fbaec2231e264a39db1770d9aa8ffa79b4b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ jobs:
         -   name: "Main Script"
             run: |
                 curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh
-                . ./prepare-and-run-flake8.sh "$(basename $GITHUB_REPOSITORY)" ./test
+                . ./prepare-and-run-flake8.sh "$(basename $GITHUB_REPOSITORY)" ./test examples
 
     pylint:
         name: Pylint
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dab7af8219adb414bbd9c2a07769da100ee60482..5fb7eccdf83826e581ef9957cbb4a1b192246b25 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -247,7 +247,7 @@ Documentation:
 Flake8:
   script:
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh
-  - . ./prepare-and-run-flake8.sh "$CI_PROJECT_NAME" test
+  - . ./prepare-and-run-flake8.sh "$CI_PROJECT_NAME" test examples
   tags:
   - python3
   except:
diff --git a/examples/black-hole-accretion.py b/examples/black-hole-accretion.py
old mode 100755
new mode 100644
index 693470b04290e5e251c9abb2694aa84b1641f915..e4cdbbbe612299e8d63aec2f46359f06cdc706ea
--- a/examples/black-hole-accretion.py
+++ b/examples/black-hole-accretion.py
@@ -28,80 +28,29 @@
 # - Pierre Lena for his passion about science and vulgarisation
 
 # If crash on OpenCL Intel implementation, add following options and force
-#export PYOPENCL_COMPILER_OUTPUT=1
-#export CL_CONFIG_USE_VECTORIZER=True
-#export CL_CONFIG_CPU_VECTORIZER_MODE=16
+# export PYOPENCL_COMPILER_OUTPUT=1
+# export CL_CONFIG_USE_VECTORIZER=True
+# export CL_CONFIG_CPU_VECTORIZER_MODE=16
 
 import pyopencl as cl
 import numpy
-import time,string
-from numpy.random import randint as nprnd
+import time
 import sys
 import getopt
 from socket import gethostname
 
+
 def DictionariesAPI():
-    PhysicsList={'Einstein':0,'Newton':1}
-    return(PhysicsList)
+    PhysicsList = {"Einstein": 0, "Newton": 1}
+    return PhysicsList
+
 
 #
 # Blank space below to simplify debugging on OpenCL code
 #
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-BlobOpenCL= """
+BlobOpenCL = """
 
 #define PI (float)3.14159265359e0f
 #define nbr 256
@@ -218,7 +167,7 @@ float spectre_cn(float rf32,float b32,float db32,
 
   MYFLOAT v=1.e0f-3.e0f/r;
 
-  qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f ));
+  qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f ));  // # noqa: E501
 
   temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu)));
 
@@ -340,7 +289,7 @@ __kernel void EachPixel(__global float *zImage,__global float *fImage,
      vp=vs;
      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
      rps=fabs(b/us);
-     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;       
+     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;
 
   } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS));
 
@@ -827,78 +776,8 @@ __kernel void Original(__global float *zImage,__global float *fImage,
 """
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 def KernelCodeCuda():
-    BlobCUDA= """
+    BlobCUDA = """
 
 #define PI (float)3.14159265359
 #define nbr 256
@@ -1019,7 +898,7 @@ __device__ float spectre_cn(float rf32,float b32,float db32,
 
   MYFLOAT v=1.-3./r;
 
-  qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 ));
+  qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 ));  // # noqa: #051
 
   temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu)));
 
@@ -1145,7 +1024,7 @@ __global__ void EachPixel(float *zImage,float *fImage,
      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
      rpp=rps;
      rps=fabs(b/us);
-     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;          
+     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;
 
   } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0));
 
@@ -1612,7 +1491,8 @@ __global__ void Original(float *zImage,float *fImage,
 
 }
 """
-    return(BlobCUDA)
+    return BlobCUDA
+
 
 # def ImageOutput(sigma,prefix,Colors):
 #     import matplotlib.pyplot as plt
@@ -1625,459 +1505,602 @@ __global__ void Original(float *zImage,float *fImage,
 #     print("Save image as %s.png file" % prefix)
 #     print("Save Time : %f" % save_time)
 
-def ImageOutput(sigma,prefix,Colors):
+
+def ImageOutput(sigma, prefix, Colors):
     from PIL import Image
-    Max=sigma.max()
-    Min=sigma.min()
+
+    Max = sigma.max()
+    Min = sigma.min()
     # Normalize value as 8bits Integer
-    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
+    SigmaInt = (255 * (sigma - Min) / (Max - Min)).astype("uint8")
     image = Image.fromarray(SigmaInt)
     image.save("%s.jpg" % prefix)
-    
-def BlackHoleCL(zImage,fImage,InputCL):
-
-    kernel_params = {}
-
-    Device=InputCL['Device']
-    GpuStyle=InputCL['GpuStyle']
-    VariableType=InputCL['VariableType']
-    Size=InputCL['Size']
-    Mass=InputCL['Mass']
-    InternalRadius=InputCL['InternalRadius']
-    ExternalRadius=InputCL['ExternalRadius']
-    Angle=InputCL['Angle']
-    Method=InputCL['Method']
-    TrackPoints=InputCL['TrackPoints']
-    Physics=InputCL['Physics']
-    NoImage=InputCL['NoImage']
-    TrackSave=InputCL['TrackSave']
-
-    PhysicsList=DictionariesAPI()
-    
-    if InputCL['BlackBody']:
+
+
+def BlackHoleCL(zImage, fImage, InputCL):
+    Device = InputCL["Device"]
+    Mass = InputCL["Mass"]
+    InternalRadius = InputCL["InternalRadius"]
+    ExternalRadius = InputCL["ExternalRadius"]
+    Angle = InputCL["Angle"]
+    Method = InputCL["Method"]
+    TrackPoints = InputCL["TrackPoints"]
+    Physics = InputCL["Physics"]
+    NoImage = InputCL["NoImage"]
+    TrackSave = InputCL["TrackSave"]
+
+    PhysicsList = DictionariesAPI()
+
+    if InputCL["BlackBody"]:
         # Spectrum is Black Body one
-        Line=0
+        Line = 0
     else:
         # Spectrum is Monochromatic Line one
-        Line=1
+        Line = 1
 
-    Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32)
-    IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32)
+    Trajectories = numpy.zeros(
+        (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
+    )
+    IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
 
     # Je detecte un peripherique GPU dans la liste des peripheriques
-    Id=0
-    HasXPU=False
+    Id = 0
+    HasXPU = False
     for platform in cl.get_platforms():
         for device in platform.get_devices():
-            if Id==Device:
-                PF4XPU=platform.name
-                XPU=device
-                print("CPU/GPU selected: ",device.name.lstrip())
-                HasXPU=True
-            Id+=1
-
-    if HasXPU==False:
-        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
+            if Id == Device:
+                PF4XPU = platform.name
+                XPU = device
+                print("CPU/GPU selected: ", device.name.lstrip())
+                HasXPU = True
+            Id += 1
+
+    if not HasXPU:
+        print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
         sys.exit()
 
     ctx = cl.Context([XPU])
-    queue = cl.CommandQueue(ctx,
-			    properties=cl.command_queue_properties.PROFILING_ENABLE)
-
-    BuildOptions="-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (PhysicsList[Physics],InputCL['TrackPoints'])
-
-    print('My Platform is ',PF4XPU)
-    
-    if 'Intel' in PF4XPU or 'Experimental' in PF4XPU or 'Clover' in PF4XPU or 'Portable' in PF4XPU :
-        print('No extra options for Intel and Clover!')
+    queue = cl.CommandQueue(
+        ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
+    )
+
+    BuildOptions = "-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (
+        PhysicsList[Physics],
+        InputCL["TrackPoints"],
+    )
+
+    print("My Platform is ", PF4XPU)
+
+    if (
+        "Intel" in PF4XPU
+        or "Experimental" in PF4XPU
+        or "Clover" in PF4XPU
+        or "Portable" in PF4XPU
+    ):
+        print("No extra options for Intel and Clover!")
     else:
-        BuildOptions = BuildOptions+" -cl-mad-enable"
+        BuildOptions = BuildOptions + " -cl-mad-enable"
+
+    BlackHoleCL = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
 
-    BlackHoleCL = cl.Program(ctx,BlobOpenCL).build(options = BuildOptions)
-    
     # Je recupere les flag possibles pour les buffers
     mf = cl.mem_flags
 
-    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
-        TrajectoriesCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories) 
+    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
+        TrajectoriesCL = cl.Buffer(
+            ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories
+        )
         IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast)
 
     zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage)
     fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage)
-    
-    start_time=time.time()
-
-    if Method=='EachPixel':
-        CLLaunch=BlackHoleCL.EachPixel(queue,(zImage.shape[0],zImage.shape[1]),
-                                       None,zImageCL,fImageCL,
-                                       numpy.float32(Mass),
-                                       numpy.float32(InternalRadius),
-                                       numpy.float32(ExternalRadius),
-                                       numpy.float32(Angle),
-                                       numpy.int32(Line))
+
+    start_time = time.time()
+
+    if Method == "EachPixel":
+        CLLaunch = BlackHoleCL.EachPixel(
+            queue,
+            (zImage.shape[0], zImage.shape[1]),
+            None,
+            zImageCL,
+            fImageCL,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
         CLLaunch.wait()
-    elif Method=='Original':
-        CLLaunch=BlackHoleCL.Original(queue,(1,),
-                                      None,zImageCL,fImageCL,
-                                      numpy.uint32(zImage.shape[0]/2),
-                                      numpy.float32(Mass),
-                                      numpy.float32(InternalRadius),
-                                      numpy.float32(ExternalRadius),
-                                      numpy.float32(Angle),
-                                      numpy.int32(Line))
+    elif Method == "Original":
+        CLLaunch = BlackHoleCL.Original(
+            queue,
+            (1,),
+            None,
+            zImageCL,
+            fImageCL,
+            numpy.uint32(zImage.shape[0] / 2),
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
         CLLaunch.wait()
-    elif Method=='EachCircle':
-        CLLaunch=BlackHoleCL.EachCircle(queue,(int(zImage.shape[0]/2),),
-                                        None,zImageCL,fImageCL,
-                                        numpy.float32(Mass),
-                                        numpy.float32(InternalRadius),
-                                        numpy.float32(ExternalRadius),
-                                        numpy.float32(Angle),
-                                        numpy.int32(Line))
+    elif Method == "EachCircle":
+        CLLaunch = BlackHoleCL.EachCircle(
+            queue,
+            (int(zImage.shape[0] / 2),),
+            None,
+            zImageCL,
+            fImageCL,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
         CLLaunch.wait()
-    elif Method=='TrajectoCircle':
-        CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],),
-                                        None,TrajectoriesCL,IdLastCL,
-                                        numpy.float32(Mass),
-                                        numpy.float32(InternalRadius),
-                                        numpy.float32(ExternalRadius),
-                                        numpy.float32(Angle),
-                                        numpy.int32(Line))
-        
-        CLLaunch=BlackHoleCL.Circle(queue,(Trajectories.shape[0],
-                                           int(zImage.shape[0]*4)),None,
-                                    TrajectoriesCL,IdLastCL,
-                                    zImageCL,fImageCL,
-                                    numpy.float32(Mass),
-                                    numpy.float32(InternalRadius),
-                                    numpy.float32(ExternalRadius),
-                                    numpy.float32(Angle),
-                                    numpy.int32(Line))
+    elif Method == "TrajectoCircle":
+        CLLaunch = BlackHoleCL.Trajectory(
+            queue,
+            (Trajectories.shape[0],),
+            None,
+            TrajectoriesCL,
+            IdLastCL,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
+
+        CLLaunch = BlackHoleCL.Circle(
+            queue,
+            (Trajectories.shape[0], int(zImage.shape[0] * 4)),
+            None,
+            TrajectoriesCL,
+            IdLastCL,
+            zImageCL,
+            fImageCL,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
         CLLaunch.wait()
     else:
-        CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],),
-                                        None,TrajectoriesCL,IdLastCL,
-                                        numpy.float32(Mass),
-                                        numpy.float32(InternalRadius),
-                                        numpy.float32(ExternalRadius),
-                                        numpy.float32(Angle),
-                                        numpy.int32(Line))
-        
-        CLLaunch=BlackHoleCL.Pixel(queue,(zImage.shape[0],zImage.shape[1]),None,
-                                   zImageCL,fImageCL,TrajectoriesCL,IdLastCL,
-                                   numpy.uint32(Trajectories.shape[0]),
-                                   numpy.float32(Mass),
-                                   numpy.float32(InternalRadius),
-                                   numpy.float32(ExternalRadius),
-                                   numpy.float32(Angle),
-                                   numpy.int32(Line))
+        CLLaunch = BlackHoleCL.Trajectory(
+            queue,
+            (Trajectories.shape[0],),
+            None,
+            TrajectoriesCL,
+            IdLastCL,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
+
+        CLLaunch = BlackHoleCL.Pixel(
+            queue,
+            (zImage.shape[0], zImage.shape[1]),
+            None,
+            zImageCL,
+            fImageCL,
+            TrajectoriesCL,
+            IdLastCL,
+            numpy.uint32(Trajectories.shape[0]),
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+        )
         CLLaunch.wait()
-    
-    compute = time.time()-start_time
-    
-    cl.enqueue_copy(queue,zImage,zImageCL).wait()
-    cl.enqueue_copy(queue,fImage,fImageCL).wait()
-    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
-        cl.enqueue_copy(queue,Trajectories,TrajectoriesCL).wait()
-        cl.enqueue_copy(queue,IdLast,IdLastCL).wait()
-    elapsed = time.time()-start_time
+
+    compute = time.time() - start_time
+
+    cl.enqueue_copy(queue, zImage, zImageCL).wait()
+    cl.enqueue_copy(queue, fImage, fImageCL).wait()
+    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
+        cl.enqueue_copy(queue, Trajectories, TrajectoriesCL).wait()
+        cl.enqueue_copy(queue, IdLast, IdLastCL).wait()
+    elapsed = time.time() - start_time
     print("\nCompute Time : %f" % compute)
     print("Elapsed Time : %f\n" % elapsed)
 
-    zMaxPosition=numpy.where(zImage[:,:]==zImage.max())
-    fMaxPosition=numpy.where(fImage[:,:]==fImage.max())
-    print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max())))
-    print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max())))
+    zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
+    fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
+    print(
+        "Z max @(%f,%f) : %f"
+        % (
+            (
+                1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
+                1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
+                zImage.max(),
+            )
+        )
+    )
+    print(
+        "Flux max @(%f,%f) : %f"
+        % (
+            (
+                1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
+                1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
+                fImage.max(),
+            )
+        )
+    )
     zImageCL.release()
     fImageCL.release()
 
-    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
+    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
         if not NoImage:
-            AngleStep=4*numpy.pi/TrackPoints
-            Angles=numpy.arange(0.,4*numpy.pi,AngleStep)
-            Angles.shape=(1,TrackPoints)
-            Hostname=gethostname()
-            Date=time.strftime("%Y%m%d_%H%M%S")
-            ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
-            
+            AngleStep = 4 * numpy.pi / TrackPoints
+            Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
+            Angles.shape = (1, TrackPoints)
+
             if TrackSave:
                 # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
                 #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
                 #               delimiter=' ', fmt='%.2e')
-                numpy.savetxt("TrouNoirTrajectories.csv",
-                            numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),delimiter=' ', fmt='%.2e')
-        
+                numpy.savetxt(
+                    "TrouNoirTrajectories.csv",
+                    numpy.transpose(numpy.concatenate((Angles, Trajectories),
+                        axis=0)),
+                    delimiter=" ",
+                    fmt="%.2e",
+                )
+
         TrajectoriesCL.release()
         IdLastCL.release()
-        
-    return(elapsed)
-
-def BlackHoleCUDA(zImage,fImage,InputCL):
-
-    kernel_params = {}
-
-    Device=InputCL['Device']
-    GpuStyle=InputCL['GpuStyle']
-    VariableType=InputCL['VariableType']
-    Size=InputCL['Size']
-    Mass=InputCL['Mass']
-    InternalRadius=InputCL['InternalRadius']
-    ExternalRadius=InputCL['ExternalRadius']
-    Angle=InputCL['Angle']
-    Method=InputCL['Method']
-    TrackPoints=InputCL['TrackPoints']
-    Physics=InputCL['Physics']
-    Threads=InputCL['Threads']
-
-    PhysicsList=DictionariesAPI()
-    
-    if InputCL['BlackBody']:
+
+    return elapsed
+
+
+def BlackHoleCUDA(zImage, fImage, InputCL):
+    Device = InputCL["Device"]
+    Mass = InputCL["Mass"]
+    InternalRadius = InputCL["InternalRadius"]
+    ExternalRadius = InputCL["ExternalRadius"]
+    Angle = InputCL["Angle"]
+    Method = InputCL["Method"]
+    TrackPoints = InputCL["TrackPoints"]
+    Physics = InputCL["Physics"]
+    Threads = InputCL["Threads"]
+
+    PhysicsList = DictionariesAPI()
+
+    if InputCL["BlackBody"]:
         # Spectrum is Black Body one
-        Line=0
+        Line = 0
     else:
         # Spectrum is Monochromatic Line one
-        Line=1
+        Line = 1
 
-    Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32)
-    IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32)
+    Trajectories = numpy.zeros(
+        (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
+    )
+    IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
 
     try:
         # For PyCUDA import
         import pycuda.driver as cuda
         from pycuda.compiler import SourceModule
-        
+
         cuda.init()
         for Id in range(cuda.Device.count()):
-            if Id==Device:
-                XPU=cuda.Device(Id)
+            if Id == Device:
+                XPU = cuda.Device(Id)
                 print("GPU selected %s" % XPU.name())
         print
 
     except ImportError:
         print("Platform does not seem to support CUDA")
 
-    Context=XPU.make_context()
+    Context = XPU.make_context()
 
     try:
-        mod = SourceModule(KernelCodeCuda(),options=['--compiler-options','-DPHYSICS=%i -DSETTRACKPOINTS=%i' % (PhysicsList[Physics],TrackPoints)])
+        mod = SourceModule(
+            KernelCodeCuda(),
+            options=[
+                "--compiler-options",
+                "-DPHYSICS=%i -DSETTRACKPOINTS=%i"
+                % (PhysicsList[Physics], TrackPoints),
+            ],
+        )
         print("Compilation seems to be OK")
-    except:
+    except Exception:
         print("Compilation seems to break")
 
-    EachPixelCU=mod.get_function("EachPixel")
-    OriginalCU=mod.get_function("Original")
-    EachCircleCU=mod.get_function("EachCircle")
-    TrajectoryCU=mod.get_function("Trajectory")
-    PixelCU=mod.get_function("Pixel")
-    CircleCU=mod.get_function("Circle")
-    
-    TrajectoriesCU = cuda.mem_alloc(Trajectories.size*Trajectories.dtype.itemsize)
+    EachPixelCU = mod.get_function("EachPixel")
+    OriginalCU = mod.get_function("Original")
+    EachCircleCU = mod.get_function("EachCircle")
+    TrajectoryCU = mod.get_function("Trajectory")
+    PixelCU = mod.get_function("Pixel")
+    CircleCU = mod.get_function("Circle")
+
+    TrajectoriesCU = cuda.mem_alloc(Trajectories.size * Trajectories.dtype.itemsize)
     cuda.memcpy_htod(TrajectoriesCU, Trajectories)
-    zImageCU = cuda.mem_alloc(zImage.size*zImage.dtype.itemsize)
+    zImageCU = cuda.mem_alloc(zImage.size * zImage.dtype.itemsize)
     cuda.memcpy_htod(zImageCU, zImage)
-    fImageCU = cuda.mem_alloc(fImage.size*fImage.dtype.itemsize)
+    fImageCU = cuda.mem_alloc(fImage.size * fImage.dtype.itemsize)
     cuda.memcpy_htod(zImageCU, fImage)
-    IdLastCU = cuda.mem_alloc(IdLast.size*IdLast.dtype.itemsize)
+    IdLastCU = cuda.mem_alloc(IdLast.size * IdLast.dtype.itemsize)
     cuda.memcpy_htod(IdLastCU, IdLast)
 
-    start_time=time.time()
-
-    if Method=='EachPixel':
-        EachPixelCU(zImageCU,fImageCU,
-                    numpy.float32(Mass),
-                    numpy.float32(InternalRadius),
-                    numpy.float32(ExternalRadius),
-                    numpy.float32(Angle),
-                    numpy.int32(Line),
-                    grid=(int(zImage.shape[0]/Threads),
-                          int(zImage.shape[1]/Threads)),
-                    block=(Threads,Threads,1))
-    elif Method=='EachCircle':
-        EachCircleCU(zImageCU,fImageCU,
-                   numpy.float32(Mass),
-                   numpy.float32(InternalRadius),
-                   numpy.float32(ExternalRadius),
-                   numpy.float32(Angle),
-                   numpy.int32(Line),
-                   grid=(int(zImage.shape[0]/Threads/2),1),
-                   block=(Threads,1,1))
-    elif Method=='Original':
-        OriginalCU(zImageCU,fImageCU,
-                   numpy.uint32(zImage.shape[0]/2),
-                   numpy.float32(Mass),
-                   numpy.float32(InternalRadius),
-                   numpy.float32(ExternalRadius),
-                   numpy.float32(Angle),
-                   numpy.int32(Line),
-                   grid=(1,1),
-                   block=(1,1,1))
-    elif Method=='TrajectoCircle':
-        TrajectoryCU(TrajectoriesCU,IdLastCU,
-                     numpy.float32(Mass),
-                     numpy.float32(InternalRadius),
-                     numpy.float32(ExternalRadius),
-                     numpy.float32(Angle),
-                     numpy.int32(Line),
-                     grid=(int(Trajectories.shape[0]/Threads),1),
-                     block=(Threads,1,1))
-        
-        CircleCU(TrajectoriesCU,IdLastCU,zImageCU,fImageCU,
-                 numpy.float32(Mass),
-                 numpy.float32(InternalRadius),
-                 numpy.float32(ExternalRadius),
-                 numpy.float32(Angle),
-                 numpy.int32(Line),
-                 grid=(int(Trajectories.shape[0]/Threads),
-                       int(zImage.shape[0]*4/Threads)),
-                 block=(Threads,Threads,1))
+    start_time = time.time()
+
+    if Method == "EachPixel":
+        EachPixelCU(
+            zImageCU,
+            fImageCU,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads)),
+            block=(Threads, Threads, 1),
+        )
+    elif Method == "EachCircle":
+        EachCircleCU(
+            zImageCU,
+            fImageCU,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(int(zImage.shape[0] / Threads / 2), 1),
+            block=(Threads, 1, 1),
+        )
+    elif Method == "Original":
+        OriginalCU(
+            zImageCU,
+            fImageCU,
+            numpy.uint32(zImage.shape[0] / 2),
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(1, 1),
+            block=(1, 1, 1),
+        )
+    elif Method == "TrajectoCircle":
+        TrajectoryCU(
+            TrajectoriesCU,
+            IdLastCU,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(int(Trajectories.shape[0] / Threads), 1),
+            block=(Threads, 1, 1),
+        )
+
+        CircleCU(
+            TrajectoriesCU,
+            IdLastCU,
+            zImageCU,
+            fImageCU,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(
+                int(Trajectories.shape[0] / Threads),
+                int(zImage.shape[0] * 4 / Threads),
+            ),
+            block=(Threads, Threads, 1),
+        )
     else:
         # Default method: TrajectoPixel
-        TrajectoryCU(TrajectoriesCU,IdLastCU,
-                     numpy.float32(Mass),
-                     numpy.float32(InternalRadius),
-                     numpy.float32(ExternalRadius),
-                     numpy.float32(Angle),
-                     numpy.int32(Line),
-                     grid=(int(Trajectories.shape[0]/Threads),1),
-                     block=(Threads,1,1))
-        
-        PixelCU(zImageCU,fImageCU,TrajectoriesCU,IdLastCU,
-                numpy.uint32(Trajectories.shape[0]),
-                numpy.float32(Mass),
-                numpy.float32(InternalRadius),
-                numpy.float32(ExternalRadius),
-                numpy.float32(Angle),
-                numpy.int32(Line),
-                grid=(int(zImage.shape[0]/Threads),
-                      int(zImage.shape[1]/Threads),1),
-                block=(Threads,Threads,1))
+        TrajectoryCU(
+            TrajectoriesCU,
+            IdLastCU,
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(int(Trajectories.shape[0] / Threads), 1),
+            block=(Threads, 1, 1),
+        )
+
+        PixelCU(
+            zImageCU,
+            fImageCU,
+            TrajectoriesCU,
+            IdLastCU,
+            numpy.uint32(Trajectories.shape[0]),
+            numpy.float32(Mass),
+            numpy.float32(InternalRadius),
+            numpy.float32(ExternalRadius),
+            numpy.float32(Angle),
+            numpy.int32(Line),
+            grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads), 1),
+            block=(Threads, Threads, 1),
+        )
 
     Context.synchronize()
 
-    compute = time.time()-start_time
+    compute = time.time() - start_time
 
-    cuda.memcpy_dtoh(zImage,zImageCU)
-    cuda.memcpy_dtoh(fImage,fImageCU)
-    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
-        cuda.memcpy_dtoh(Trajectories,TrajectoriesCU)
-    elapsed = time.time()-start_time
+    cuda.memcpy_dtoh(zImage, zImageCU)
+    cuda.memcpy_dtoh(fImage, fImageCU)
+    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
+        cuda.memcpy_dtoh(Trajectories, TrajectoriesCU)
+    elapsed = time.time() - start_time
     print("\nCompute Time : %f" % compute)
     print("Elapsed Time : %f\n" % elapsed)
 
-    zMaxPosition=numpy.where(zImage[:,:]==zImage.max())
-    fMaxPosition=numpy.where(fImage[:,:]==fImage.max())
-    print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max())))
-    print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max())))
+    zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
+    fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
+    print(
+        "Z max @(%f,%f) : %f"
+        % (
+            (
+                1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
+                1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
+                zImage.max(),
+            )
+        )
+    )
+    print(
+        "Flux max @(%f,%f) : %f"
+        % (
+            (
+                1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
+                1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
+                fImage.max(),
+            )
+        )
+    )
 
-    
     Context.pop()
 
     Context.detach()
 
-    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
+    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
         if not NoImage:
-            AngleStep=4*numpy.pi/TrackPoints
-            Angles=numpy.arange(0.,4*numpy.pi,AngleStep)
-            Angles.shape=(1,TrackPoints)
-            Hostname=gethostname()
-            Date=time.strftime("%Y%m%d_%H%M%S")
-            ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
-            
+            AngleStep = 4 * numpy.pi / TrackPoints
+            Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
+            Angles.shape = (1, TrackPoints)
+
             # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
             #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
             #               delimiter=' ', fmt='%.2e')
-            numpy.savetxt("TrouNoirTrajectories.csv",
-                          numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
-                          delimiter=' ', fmt='%.2e')
+            numpy.savetxt(
+                "TrouNoirTrajectories.csv",
+                numpy.transpose(numpy.concatenate((Angles, Trajectories), axis=0)),
+                delimiter=" ",
+                fmt="%.2e",
+            )
 
-    
-    return(elapsed)
+    return elapsed
 
-if __name__=='__main__':
+
+if __name__ == "__main__":
 
     # Default device: first one!
-    Device=0
+    Device = 0
     # Default implementation: OpenCL, most versatile!
-    GpuStyle = 'OpenCL'
-    Mass = 1.
+    GpuStyle = "OpenCL"
+    Mass = 1.0
     # Internal Radius 3 times de Schwarzschild Radius
-    InternalRadius=6.*Mass
+    InternalRadius = 6.0 * Mass
     #
-    ExternalRadius=12.
+    ExternalRadius = 12.0
     #
     # Angle with normal to disc 10 degrees
-    Angle = numpy.pi/180.*(90.-10.)
+    Angle = numpy.pi / 180.0 * (90.0 - 10.0)
     # Radiation of disc : BlackBody or Monochromatic
     BlackBody = False
     # Size of image
-    Size=1024
+    Size = 1024
     # Variable Type
-    VariableType='FP32'
+    VariableType = "FP32"
     # ?
-    q=-2
+    q = -2
     # Method of resolution
-    Method='TrajectoPixel'
+    Method = "TrajectoPixel"
     # Colors for output image
-    Colors='Greyscale'
+    Colors = "Greyscale"
     # Physics
-    Physics='Einstein'
+    Physics = "Einstein"
     # No output as image
     NoImage = False
     # Threads in CUDA
     Threads = 32
     # Trackpoints of trajectories
-    TrackPoints=2048
+    TrackPoints = 2048
     # Tracksave of trajectories
-    TrackSave=False
-    
-    HowToUse='%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>'
+    TrackSave = False
+
+    HowToUse = "%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>"  # noqa: E501
 
     try:
-        opts, args = getopt.getopt(sys.argv[1:],"hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",["tracksave","blackbody","noimage","camera","size=","mass=","internal=","external=","angle=","device=","gpustyle=","variabletype=","method=","threads=","colors=","physics=","trackpoints="])
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",
+            [
+                "tracksave",
+                "blackbody",
+                "noimage",
+                "camera",
+                "size=",
+                "mass=",
+                "internal=",
+                "external=",
+                "angle=",
+                "device=",
+                "gpustyle=",
+                "variabletype=",
+                "method=",
+                "threads=",
+                "colors=",
+                "physics=",
+                "trackpoints=",
+            ],
+        )
     except getopt.GetoptError:
         print(HowToUse % sys.argv[0])
         sys.exit(2)
 
     # List of Devices
-    Devices=[]
-    Alu={}
-        
+    Devices = []
+    Alu = {}
+
     for opt, arg in opts:
-        if opt == '-h':
+        if opt == "-h":
             print(HowToUse % sys.argv[0])
-            
+
             print("\nInformations about devices detected under OpenCL API:")
             # For PyOpenCL import
             try:
-                import pyopencl as cl
-                Id=0
+                Id = 0
                 for platform in cl.get_platforms():
                     for device in platform.get_devices():
-                        #deviceType=cl.device_type.to_string(device.type)
-                        deviceType="xPU"
-                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
-                        Id=Id+1
-                        
-            except:
+                        # deviceType=cl.device_type.to_string(device.type)
+                        deviceType = "xPU"
+                        print(
+                            "Device #%i from %s of type %s : %s"
+                            % (
+                                Id,
+                                platform.vendor.lstrip(),
+                                deviceType,
+                                device.name.lstrip(),
+                            )
+                        )
+                        Id = Id + 1
+
+            except Exception:
                 print("Your platform does not seem to support OpenCL")
-                
+
             print("\nInformations about devices detected under CUDA API:")
-                # For PyCUDA import
+            # For PyCUDA import
             try:
                 import pycuda.driver as cuda
+
                 cuda.init()
                 for Id in range(cuda.Device.count()):
-                    device=cuda.Device(Id)
-                    print("Device #%i of type GPU : %s" % (Id,device.name()))
+                    device = cuda.Device(Id)
+                    print("Device #%i of type GPU : %s" % (Id, device.name()))
                 print
-            except:
+            except Exception:
                 print("Your platform does not seem to support CUDA")
-        
+
             sys.exit()
-        
+
         elif opt in ("-d", "--device"):
-#            Devices.append(int(arg))
-            Device=int(arg)
+            #            Devices.append(int(arg))
+            Device = int(arg)
         elif opt in ("-g", "--gpustyle"):
             GpuStyle = arg
         elif opt in ("-v", "--variabletype"):
@@ -2093,7 +2116,7 @@ if __name__=='__main__':
         elif opt in ("-e", "--external"):
             ExternalRadius = float(arg)
         elif opt in ("-a", "--angle"):
-            Angle = numpy.pi/180.*(90.-float(arg))
+            Angle = numpy.pi / 180.0 * (90.0 - float(arg))
         elif opt in ("-b", "--blackbody"):
             BlackBody = True
         elif opt in ("-j", "--tracksave"):
@@ -2124,72 +2147,79 @@ if __name__=='__main__':
     print("Trackpoints of Trajectories : %i" % TrackPoints)
     print("Tracksave of Trajectories : %i" % TrackSave)
 
-    if GpuStyle=='CUDA':
-        print("\nSelection of CUDA device") 
+    if GpuStyle == "CUDA":
+        print("\nSelection of CUDA device")
         try:
             # For PyCUDA import
             import pycuda.driver as cuda
-            
+
             cuda.init()
             for Id in range(cuda.Device.count()):
-                device=cuda.Device(Id)
-                print("Device #%i of type GPU : %s" % (Id,device.name()))
+                device = cuda.Device(Id)
+                print("Device #%i of type GPU : %s" % (Id, device.name()))
                 if Id in Devices:
-                    Alu[Id]='GPU'
-            
+                    Alu[Id] = "GPU"
+
         except ImportError:
             print("Platform does not seem to support CUDA")
 
-    if GpuStyle=='OpenCL':
-        print("\nSelection of OpenCL device") 
+    if GpuStyle == "OpenCL":
+        print("\nSelection of OpenCL device")
         try:
             # For PyOpenCL import
             import pyopencl as cl
-            Id=0
+
+            Id = 0
             for platform in cl.get_platforms():
                 for device in platform.get_devices():
-                    #deviceType=cl.device_type.to_string(device.type)
-                    deviceType="xPU"
-                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
+                    # deviceType=cl.device_type.to_string(device.type)
+                    deviceType = "xPU"
+                    print(
+                        "Device #%i from %s of type %s : %s"
+                        % (
+                            Id,
+                            platform.vendor.lstrip().rstrip(),
+                            deviceType,
+                            device.name.lstrip().rstrip(),
+                        )
+                    )
 
                     if Id in Devices:
-                    # Set the Alu as detected Device Type
-                        Alu[Id]=deviceType
-                    Id=Id+1
+                        # Set the Alu as detected Device Type
+                        Alu[Id] = deviceType
+                    Id = Id + 1
         except ImportError:
             print("Platform does not seem to support OpenCL")
 
-
-    zImage=numpy.zeros((Size,Size),dtype=numpy.float32)
-    fImage=numpy.zeros((Size,Size),dtype=numpy.float32)
-
-    InputCL={}
-    InputCL['Device']=Device
-    InputCL['GpuStyle']=GpuStyle
-    InputCL['VariableType']=VariableType
-    InputCL['Size']=Size
-    InputCL['Mass']=Mass
-    InputCL['InternalRadius']=InternalRadius
-    InputCL['ExternalRadius']=ExternalRadius
-    InputCL['Angle']=Angle
-    InputCL['BlackBody']=BlackBody
-    InputCL['Method']=Method
-    InputCL['TrackPoints']=TrackPoints
-    InputCL['Physics']=Physics
-    InputCL['Threads']=Threads
-    InputCL['NoImage']=NoImage
-    InputCL['TrackSave']=TrackSave
-
-    if GpuStyle=='OpenCL':
-        duration=BlackHoleCL(zImage,fImage,InputCL)
+    zImage = numpy.zeros((Size, Size), dtype=numpy.float32)
+    fImage = numpy.zeros((Size, Size), dtype=numpy.float32)
+
+    InputCL = {}
+    InputCL["Device"] = Device
+    InputCL["GpuStyle"] = GpuStyle
+    InputCL["VariableType"] = VariableType
+    InputCL["Size"] = Size
+    InputCL["Mass"] = Mass
+    InputCL["InternalRadius"] = InternalRadius
+    InputCL["ExternalRadius"] = ExternalRadius
+    InputCL["Angle"] = Angle
+    InputCL["BlackBody"] = BlackBody
+    InputCL["Method"] = Method
+    InputCL["TrackPoints"] = TrackPoints
+    InputCL["Physics"] = Physics
+    InputCL["Threads"] = Threads
+    InputCL["NoImage"] = NoImage
+    InputCL["TrackSave"] = TrackSave
+
+    if GpuStyle == "OpenCL":
+        duration = BlackHoleCL(zImage, fImage, InputCL)
     else:
-        duration=BlackHoleCUDA(zImage,fImage,InputCL)
-        
-    Hostname=gethostname()
-    Date=time.strftime("%Y%m%d_%H%M%S")
-    ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
-    
-    
+        duration = BlackHoleCUDA(zImage, fImage, InputCL)
+
+    Hostname = gethostname()
+    Date = time.strftime("%Y%m%d_%H%M%S")
+    ImageInfo = "%s_Device%i_%s_%s" % (Method, Device, Hostname, Date)
+
     if not NoImage:
-        ImageOutput(zImage,"TrouNoirZ_%s" % ImageInfo,Colors)
-        ImageOutput(fImage,"TrouNoirF_%s" % ImageInfo,Colors)
+        ImageOutput(zImage, "TrouNoirZ_%s" % ImageInfo, Colors)
+        ImageOutput(fImage, "TrouNoirF_%s" % ImageInfo, Colors)
diff --git a/examples/demo-struct-reduce.py b/examples/demo-struct-reduce.py
index c0c26e34743687a281c534c05d9c8cb74c6587ec..c6369a6a3774d121cdbb6990c03e9d8e0aa1293b 100644
--- a/examples/demo-struct-reduce.py
+++ b/examples/demo-struct-reduce.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pyopencl as cl
 
+
 def make_collector_dtype(device):
     dtype = np.dtype([
         ("cur_min", np.int32),
@@ -16,6 +17,7 @@ def make_collector_dtype(device):
 
     return dtype, c_decl
 
+
 ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)
 
diff --git a/examples/demo_elementwise.py b/examples/demo_elementwise.py
index 3521089210a6dcde9d957443cc909049f39b61c8..ec1a8d5d89eefb808b8f6f8cf757234f0bc42a7d 100644
--- a/examples/demo_elementwise.py
+++ b/examples/demo_elementwise.py
@@ -18,8 +18,7 @@ b_g = cl.array.to_device(queue, b_np)
 lin_comb = ElementwiseKernel(ctx,
     "float k1, float *a_g, float k2, float *b_g, float *res_g",
     "res_g[i] = k1 * a_g[i] + k2 * b_g[i]",
-    "lin_comb"
-)
+    "lin_comb")
 
 res_g = cl.array.empty_like(a_g)
 lin_comb(2, a_g, 3, b_g, res_g)
diff --git a/examples/demo_elementwise_complex.py b/examples/demo_elementwise_complex.py
index 4fe98ec9d0f0d514c84180e2775d84c7f808b152..5f0bfc297047e5aa1fde06d5331bc00330af9e51 100644
--- a/examples/demo_elementwise_complex.py
+++ b/examples/demo_elementwise_complex.py
@@ -8,11 +8,9 @@ queue = cl.CommandQueue(ctx)
 
 n = 10
 a_gpu = cl_array.to_device(queue,
-        ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
-            ).astype(numpy.complex64))
+        (numpy.random.randn(n) + 1j*numpy.random.randn(n)).astype(numpy.complex64))
 b_gpu = cl_array.to_device(queue,
-        ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
-            ).astype(numpy.complex64))
+        (numpy.random.randn(n) + 1j*numpy.random.randn(n)).astype(numpy.complex64))
 
 from pyopencl.elementwise import ElementwiseKernel
 complex_prod = ElementwiseKernel(ctx,
@@ -24,7 +22,7 @@ complex_prod = ElementwiseKernel(ctx,
         "complex_prod",
         preamble="""
         #define complex_ctr(x, y) (float2)(x, y)
-        #define complex_mul(a, b) complex_ctr(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y))
+        #define complex_mul(a, b) complex_ctr(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y))  # noqa: E501
         #define complex_div_scalar(a, b) complex_ctr((a).x / (b), (a).y / (b))
         #define conj(a) complex_ctr((a).x, -(a).y)
         #define conj_transp(a) complex_ctr(-(a).y, (a).x)
diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py
index 2ba293c5dfc3783f449b8bc6e0b060a90a4d0c3e..d27aed437128c818a2049a0feb1a930544b8a557 100644
--- a/examples/demo_meta_codepy.py
+++ b/examples/demo_meta_codepy.py
@@ -20,7 +20,7 @@ b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
 c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
 
 from cgen import FunctionBody, \
-        FunctionDeclaration, Typedef, POD, Value, \
+        FunctionDeclaration, POD, Value, \
         Pointer, Module, Block, Initializer, Assign, Const
 from cgen.opencl import CLKernel, CLGlobal, \
         CLRequiredWorkGroupSize
@@ -53,4 +53,3 @@ c = numpy.empty_like(a)
 cl.enqueue_copy(queue, c, c_buf).wait()
 
 assert la.norm(c-(a+b)) == 0
-
diff --git a/examples/dump-performance.py b/examples/dump-performance.py
index f582cd99fcae98df7325717b4e1541dbf873bbcb..8e5eec501e47dc6a0fa144b7db133690cb7b23b9 100644
--- a/examples/dump-performance.py
+++ b/examples/dump-performance.py
@@ -27,10 +27,12 @@ def main():
         for i in range(6, 31, 2):
             bs = 1 << i
             try:
-                result = "%g GB/s" % (perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
+                result = "%g GB/s" % (
+                        perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
             except Exception as e:
                 result = "exception: %s" % e.__class__.__name__
             print("bandwidth @ %d bytes: %s" % (bs, result))
 
+
 if __name__ == "__main__":
     main()
diff --git a/examples/dump-properties.py b/examples/dump-properties.py
index 07d9159827c315605286d46a4f7de494b7d7489e..3ca409b6a780bb17ecf155972fce37708a5f8795 100644
--- a/examples/dump-properties.py
+++ b/examples/dump-properties.py
@@ -14,7 +14,7 @@ def print_info(obj, info_cls):
             info = getattr(info_cls, info_name)
             try:
                 info_value = obj.get_info(info)
-            except:
+            except Exception:
                 info_value = "<error>"
 
             if (info_cls == cl.device_info and info_name == "PARTITION_TYPES_EXT"
@@ -26,9 +26,10 @@ def print_info(obj, info_cls):
             else:
                 try:
                     print(f"{info_name}: {info_value}")
-                except:
+                except Exception:
                     print("%s: <error>" % info_name)
 
+
 for platform in cl.get_platforms():
     print(75*"=")
     print(platform)
@@ -55,7 +56,7 @@ for platform in cl.get_platforms():
                         ]:
                     try:
                         formats = cl.get_supported_image_formats(ctx, mf, itype)
-                    except:
+                    except Exception:
                         formats = "<error>"
                     else:
                         def str_chd_type(chdtype):
diff --git a/examples/median-filter.py b/examples/median-filter.py
index 7f787500ccf82a77b5961413f86e16dbf3cfe8a9..fccb9f583ee95df3e7d6e82f27bcfdd5b4b6d8fa 100644
--- a/examples/median-filter.py
+++ b/examples/median-filter.py
@@ -2,8 +2,8 @@ import pyopencl as cl
 import numpy as np
 from imageio import imread, imsave
 
-#Read in image
-img = imread('noisyImage.jpg').astype(np.float32)
+# Read in image
+img = imread("noisyImage.jpg").astype(np.float32)
 print(img.shape)
 img = np.mean(img, axis=2)
 print(img.shape)
@@ -14,7 +14,7 @@ queue = cl.CommandQueue(ctx)
 mf = cl.mem_flags
 
 # Kernel function
-src = '''
+src = """
 void sort(int *a, int *b, int *c) {
    int swap;
    if(*a > *b) {
@@ -33,7 +33,9 @@ void sort(int *a, int *b, int *c) {
       *c = swap;
    }
 }
-__kernel void medianFilter(__global float *img, __global float *result, __global int *width, __global int *height)
+__kernel void medianFilter(
+    __global float *img, __global float *result, __global int *width, __global
+    int *height)
 {
     int w = *width;
     int h = *height;
@@ -47,7 +49,8 @@ __kernel void medianFilter(__global float *img, __global float *result, __global
     }
     else
     {
-        int pixel00, pixel01, pixel02, pixel10, pixel11, pixel12, pixel20, pixel21, pixel22;
+        int pixel00, pixel01, pixel02, pixel10, pixel11, pixel12, pixel20,
+            pixel21, pixel22;
         pixel00 = img[i - 1 - w];
         pixel01 = img[i- w];
         pixel02 = img[i + 1 - w];
@@ -71,19 +74,23 @@ __kernel void medianFilter(__global float *img, __global float *result, __global
         result[i] = pixel11;
     }
 }
-'''
+"""
 
-#Kernel function instantiation
+# Kernel function instantiation
 prg = cl.Program(ctx, src).build()
-#Allocate memory for variables on the device
-img_g =  cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=img)
+# Allocate memory for variables on the device
+img_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=img)
 result_g = cl.Buffer(ctx, mf.WRITE_ONLY, img.nbytes)
-width_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[1]))
-height_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[0]))
+width_g = cl.Buffer(
+    ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[1])
+)
+height_g = cl.Buffer(
+    ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(img.shape[0])
+)
 # Call Kernel. Automatically takes care of block/grid distribution
-prg.medianFilter(queue, img.shape, None , img_g, result_g, width_g, height_g)
+prg.medianFilter(queue, img.shape, None, img_g, result_g, width_g, height_g)
 result = np.empty_like(img)
 cl.enqueue_copy(queue, result, result_g)
 
 # Show the blurred image
-imsave('medianFilter-OpenCL.jpg', result)
+imsave("medianFilter-OpenCL.jpg", result)
diff --git a/examples/n-body.py b/examples/n-body.py
old mode 100755
new mode 100644
index 4c207b76b23401d439ab356f862a029c1a84392f..ec8a3369c693ce4b427a78ce00a2c4db852a42de
--- a/examples/n-body.py
+++ b/examples/n-body.py
@@ -7,30 +7,31 @@ By default, rendering in OpenGL is disabled. Add -g option to activate.
 
 Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
 
-CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
+CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
 Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
 
 Thanks to Andreas Klockner for PyOpenCL:
 http://mathema.tician.de/software/pyopencl
- 
+
 """
 import getopt
 import sys
 import time
 import numpy as np
 import pyopencl as cl
-import pyopencl.array as cl_array
+import pyopencl.array
 from numpy.random import randint as nprnd
-import string, sys
+
 
 def DictionariesAPI():
-    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
-    Computing={'FP32':0,'FP64':1}
-    Interaction={'Force':0,'Potential':1}
-    Artevasion={'None':0,'NegExp':1,'CorRad':2}
-    return(Marsaglia,Computing,Interaction,Artevasion)
+    Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3}
+    Computing = {"FP32": 0, "FP64": 1}
+    Interaction = {"Force": 0, "Potential": 1}
+    Artevasion = {"None": 0, "NegExp": 1, "CorRad": 2}
+    return (Marsaglia, Computing, Interaction, Artevasion)
 
-BlobOpenCL= """
+
+BlobOpenCL = """
 #define TFP32 0
 #define TFP64 1
 
@@ -114,14 +115,14 @@ MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
 MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
 #if INTERACTION == TFORCE
 #if ARTEVASION == NEGEXP
-// Force gradient of potential, set as (1-exp(-r))/r 
+// Force gradient of potential, set as (1-exp(-r))/r
 {
     private MYFLOAT r=MyDistance(n,m);
     private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
     return((n-m)*num/(MYFLOAT)(r*r*r));
 }
 #elif ARTEVASION == CORRAD
-// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2) 
+// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
 {
     private MYFLOAT r=MyDistance(n,m);
     private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
@@ -152,8 +153,8 @@ MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
 MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
 {
     private MYFLOAT potential=(MYFLOAT)0.e0f;
-    private MYFLOAT4 x=clDataX[gid]; 
-    
+    private MYFLOAT4 x=clDataX[gid];
+
     for (int i=0;i<get_global_size(0);i++)
     {
         if (gid != i)
@@ -164,7 +165,7 @@ MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
     return(potential);
 }
 
-MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
+MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid) // # noqa: E501
 {
     return(PairPotential(clDataX[gid],clCoM[0]));
 }
@@ -179,14 +180,14 @@ MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clData
     a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
     v0=(MYFLOAT4)clDataInV[gid];
     x0=(MYFLOAT4)clDataInX[gid];
-    int N = get_global_size(0);    
-    
+    int N = get_global_size(0);
+
     for (private int i=0;i<N;i++)
     {
         if (gid != i)
         a0+=Interaction(x0,clDataInX[i]);
     }
-        
+
     a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
     v1=a0*dt+v0;
     x1=v0*dt+x0;
@@ -204,7 +205,7 @@ MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clData
         if (gid != k)
         a2+=Interaction(x2,clDataInX[k]);
     }
-    
+
     a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
     v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
     x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
@@ -213,7 +214,7 @@ MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clData
         if (gid != l)
         a3+=Interaction(x3,clDataInX[l]);
     }
-    
+
     a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
     v4=a3*dt+v0;
     x4=v3*dt+x0;
@@ -222,10 +223,10 @@ MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clData
         if (gid != m)
         a4+=Interaction(x4,clDataInX[m]);
     }
-    
+
     xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
     vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
-     
+
     return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
 }
 
@@ -275,7 +276,7 @@ MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clD
         if (gid != i)
           a+=Interaction(x0,clDataInX[i]);
     }
-       
+
     vf=v0+dt*a;
     xf=x0+dt*vf;
 
@@ -295,21 +296,21 @@ MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clD
         if (gid != i)
         a+=Interaction(x0,clDataInX[i]);
     }
-       
+
     vf=v0+dt*a;
     xf=x0+dt*v0;
- 
+
     return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
 }
 
-__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX, 
+__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
                                    MYFLOAT diameter,uint seed_z,uint seed_w)
 {
     private int gid=get_global_id(0);
     private uint zmwc=seed_z+gid;
     private uint wmwc=seed_w+(gid+1)%2;
     private MYFLOAT Heat;
- 
+
     for (int i=0;i<gid;i++)
     {
         Heat=MWCfp;
@@ -335,19 +336,19 @@ __kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
        Length=(MYFLOAT)length((MYFLOAT4)Position);
     }
 
-    clDataX[gid]=Position;    
+    clDataX[gid]=Position;
 
     barrier(CLK_GLOBAL_MEM_FENCE);
 }
 
-__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
+__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
                              uint seed_z,uint seed_w)
 {
     int gid=get_global_id(0);
     uint zmwc=seed_z+gid;
     uint wmwc=seed_w-gid;
     private MYFLOAT Heat;
- 
+
     for (int i=0;i<gid;i++)
     {
         Heat=MWCfp;
@@ -417,7 +418,7 @@ __kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,M
 {
     private int gid = get_global_id(0);
     private MYFLOAT8 clDataGid;
-    
+
     clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
     barrier(CLK_GLOBAL_MEM_FENCE);
     clDataX[gid]=clDataGid.s0123;
@@ -428,7 +429,7 @@ __kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT
 {
     private int gid = get_global_id(0);
     private MYFLOAT8 clDataGid;
-    
+
     clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
     barrier(CLK_GLOBAL_MEM_FENCE);
     clDataX[gid]=clDataGid.s0123;
@@ -439,7 +440,7 @@ __kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clData
 {
     private int gid = get_global_id(0);
     private MYFLOAT8 clDataGid;
-    
+
     clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
     barrier(CLK_GLOBAL_MEM_FENCE);
     clDataX[gid]=clDataGid.s0123;
@@ -449,7 +450,7 @@ __kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clData
 __kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
 {
     private int gid = get_global_id(0);
-    private MYFLOAT8 clDataGid;    
+    private MYFLOAT8 clDataGid;
 
     clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
     barrier(CLK_GLOBAL_MEM_FENCE);
@@ -469,21 +470,21 @@ __kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential
     int gid = get_global_id(0);
 
     MYFLOAT potential=(MYFLOAT)0.e0f;
-    MYFLOAT4 x=clDataX[gid]; 
-    
+    MYFLOAT4 x=clDataX[gid];
+
     for (int i=0;i<get_global_size(0);i++)
     {
         if (gid != i)
         potential+=PairPotential(x,clDataX[i]);
     }
-                 
+
     barrier(CLK_GLOBAL_MEM_FENCE);
     clPotential[gid]=potential*(MYFLOAT)5.e-1f;
 }
 
 __kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
 {
-    MYFLOAT4 CoM=clDataX[0]; 
+    MYFLOAT4 CoM=clDataX[0];
 
     for (int i=1;i<Size;i++)
     {
@@ -497,7 +498,7 @@ __kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,i
 __kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
 {
     int gid = get_global_id(0);
-    
+
     barrier(CLK_GLOBAL_MEM_FENCE);
     MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
     clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
@@ -505,63 +506,73 @@ __kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
 
 """
 
-def MainOpenCL(clDataX,clDataV,Step,Method):
-    time_start=time.time()
-    if Method=="RungeKutta":
-        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
-    elif Method=="ExplicitEuler":
-        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
-    elif Method=="Heun":
-        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
+
+def MainOpenCL(clDataX, clDataV, Step, Method):
+    time_start = time.time()
+    if Method == "RungeKutta":
+        CLLaunch = MyRoutines.RungeKutta(
+            queue, (Number, 1), None, clDataX, clDataV, Step
+        )
+    elif Method == "ExplicitEuler":
+        CLLaunch = MyRoutines.ExplicitEuler(
+            queue, (Number, 1), None, clDataX, clDataV, Step
+        )
+    elif Method == "Heun":
+        CLLaunch = MyRoutines.Heun(queue, (Number, 1), None, clDataX, clDataV, Step)
     else:
-        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
+        CLLaunch = MyRoutines.ImplicitEuler(
+            queue, (Number, 1), None, clDataX, clDataV, Step
+        )
     CLLaunch.wait()
-    Elapsed=time.time()-time_start
-    return(Elapsed)
-    
+    Elapsed = time.time() - time_start
+    return Elapsed
+
+
 def display(*args):
+    global MyDataX, MyDataV, clDataX, clDataV, Step, Method, Number, Iterations, \
+            Durations, Verbose, SpeedRendering
+
+    gl.glClearColor(0.0, 0.0, 0.0, 0.0)
+    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+    gl.glColor3f(1.0, 1.0, 1.0)
 
-    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
-    
-    glClearColor(0.0, 0.0, 0.0, 0.0)
-    glClear(GL_COLOR_BUFFER_BIT)
-    glColor3f(1.0,1.0,1.0)
-    
-    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
+    MainOpenCL(clDataX, clDataV, Step, Method)
     if SpeedRendering:
         cl.enqueue_copy(queue, MyDataV, clDataV)
-        MyDataV.reshape(Number,4)[:,3]=1
-        glVertexPointerf(MyDataV.reshape(Number,4))
+        MyDataV.reshape(Number, 4)[:, 3] = 1
+        gl.glVertexPointerf(MyDataV.reshape(Number, 4))
     else:
         cl.enqueue_copy(queue, MyDataX, clDataX)
-        MyDataX.reshape(Number,4)[:,3]=1
-        glVertexPointerf(MyDataX.reshape(Number,4))
+        MyDataX.reshape(Number, 4)[:, 3] = 1
+        gl.glVertexPointerf(MyDataX.reshape(Number, 4))
 
     if Verbose:
-        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
+        print("Positions for #%s iteration: %s" % (Iterations, MyDataX))
     else:
-        sys.stdout.write('.')
+        sys.stdout.write(".")
         sys.stdout.flush()
-    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))    
-    glEnableClientState(GL_VERTEX_ARRAY)
-    glDrawArrays(GL_POINTS, 0, Number)
-    glDisableClientState(GL_VERTEX_ARRAY)
-    glFlush()
-    Iterations+=1
-    glutSwapBuffers()
+    Durations = np.append(Durations, MainOpenCL(clDataX, clDataV, Step, Method))
+    gl.glEnableClientState(gl.GL_VERTEX_ARRAY)
+    gl.glDrawArrays(gl.GL_POINTS, 0, Number)
+    gl.glDisableClientState(gl.GL_VERTEX_ARRAY)
+    gl.glFlush()
+    Iterations += 1
+    glut.glutSwapBuffers()
+
 
 def halt():
     pass
 
-def keyboard(k,x,y):
-    global ViewRZ,SpeedRendering
-    LC_Z = as_8_bit( 'z' )
-    UC_Z = as_8_bit( 'Z' )
-    Plus = as_8_bit( '+' )
-    Minus = as_8_bit( '-' )
-    Switch = as_8_bit( 's' )
 
-    Zoom=1
+def keyboard(k, x, y):
+    global ViewRZ, SpeedRendering
+    LC_Z = glut.as_8_bit("z")
+    UC_Z = glut.as_8_bit("Z")
+    Plus = glut.as_8_bit("+")
+    Minus = glut.as_8_bit("-")
+    Switch = glut.as_8_bit("s")
+
+    Zoom = 1
     if k == LC_Z:
         ViewRZ += 1.0
     elif k == UC_Z:
@@ -572,161 +583,203 @@ def keyboard(k,x,y):
         Zoom /= 2.0
     elif k == Switch:
         if SpeedRendering:
-            SpeedRendering=False
+            SpeedRendering = False
         else:
-            SpeedRendering=True
-    elif ord(k) == 27: # Escape
-        glutLeaveMainLoop()
-        return(False)
+            SpeedRendering = True
+    elif ord(k) == 27:  # Escape
+        glut.glutLeaveMainLoop()
+        return False
     else:
         return
-    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
-    glScalef(Zoom,Zoom,Zoom)
-    glutPostRedisplay()
+    gl.glRotatef(ViewRZ, 0.0, 0.0, 1.0)
+    gl.glScalef(Zoom, Zoom, Zoom)
+    glut.glutPostRedisplay()
+
 
-def special(k,x,y):
+def special(k, x, y):
     global ViewRX, ViewRY
 
-    Step=1.
-    if k == GLUT_KEY_UP:
+    Step = 1.0
+    if k == glut.GLUT_KEY_UP:
         ViewRX += Step
-    elif k == GLUT_KEY_DOWN:
+    elif k == glut.GLUT_KEY_DOWN:
         ViewRX -= Step
-    elif k == GLUT_KEY_LEFT:
+    elif k == glut.GLUT_KEY_LEFT:
         ViewRY += Step
-    elif k == GLUT_KEY_RIGHT:
+    elif k == glut.GLUT_KEY_RIGHT:
         ViewRY -= Step
     else:
         return
-    glRotatef(ViewRX, 1.0, 0.0, 0.0)
-    glRotatef(ViewRY, 0.0, 1.0, 0.0)
-    glutPostRedisplay()
+    gl.glRotatef(ViewRX, 1.0, 0.0, 0.0)
+    gl.glRotatef(ViewRY, 0.0, 1.0, 0.0)
+    glut.glutPostRedisplay()
+
 
 def setup_viewport():
     global SizeOfBox
-    glMatrixMode(GL_PROJECTION)
-    glLoadIdentity()
-    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
-    glutPostRedisplay()
-    
+    gl.glMatrixMode(gl.GL_PROJECTION)
+    gl.glLoadIdentity()
+    gl.glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
+    glut.glutPostRedisplay()
+
+
 def reshape(w, h):
-    glViewport(0, 0, w, h)
+    gl.glViewport(0, 0, w, h)
     setup_viewport()
 
-if __name__=='__main__':
 
-    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
-    
+if __name__ == "__main__":
+
+    global Number, Step, clDataX, clDataV, MyDataX, MyDataV, Method, SizeOfBox, \
+            Iterations, Verbose, Durations
+
     # ValueType
-    ValueType='FP32'
-    class MyFloat(np.float32):pass
+    ValueType = "FP32"
+
+    class MyFloat(np.float32):
+        pass
+
     #    clType8=cl_array.vec.float8
     # Set defaults values
-    np.set_printoptions(precision=2)  
+    np.set_printoptions(precision=2)
     # Id of Device : 1 is for first find !
-    Device=0
+    Device = 0
     # Number of bodies is integer
-    Number=2
+    Number = 2
     # Number of iterations (for standalone execution)
-    Iterations=10
+    Iterations = 10
     # Size of shape
-    SizeOfShape=MyFloat(1.)
+    SizeOfShape = MyFloat(1.0)
     # Initial velocity of particules
-    Velocity=MyFloat(1.)
+    Velocity = MyFloat(1.0)
     # Step
-    Step=MyFloat(1./32)
+    Step = MyFloat(1.0 / 32)
     # Method of integration
-    Method='ImplicitEuler'
+    Method = "ImplicitEuler"
     # InitialRandom
-    InitialRandom=False
+    InitialRandom = False
     # RNG Marsaglia Method
-    RNG='MWC'
+    RNG = "MWC"
     # Viriel Distribution of stress
-    VirielStress=True
+    VirielStress = True
     # Verbose
-    Verbose=False
+    Verbose = False
     # OpenGL real time rendering
-    OpenGL=False
+    OpenGL = False
     # Speed rendering
-    SpeedRendering=False
+    SpeedRendering = False
     # Counter ArtEvasions Measures (artefact evasion)
-    CoArEv='None'
+    CoArEv = "None"
     # Shape to distribute
-    Shape='Ball'
+    Shape = "Ball"
     # Type of Interaction
-    InterType='Force'
-    
-    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
+    InterType = "Force"
+
+    HowToUse = "%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>"  # noqa: E501
 
     try:
-        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "rpgehod:n:i:z:v:s:m:t:b:x:",
+            [
+                "random",
+                "potential",
+                "coarev",
+                "opengl",
+                "viriel",
+                "verbose",
+                "device=",
+                "number=",
+                "iterations=",
+                "size=",
+                "velocity=",
+                "step=",
+                "method=",
+                "valuetype=",
+                "shape=",
+            ],
+        )
     except getopt.GetoptError:
         print(HowToUse % sys.argv[0])
         sys.exit(2)
 
     for opt, arg in opts:
-        if opt == '-h':
+        if opt == "-h":
             print(HowToUse % sys.argv[0])
 
             print("\nInformations about devices detected under OpenCL:")
             try:
-                Id=0
+                Id = 0
                 for platform in cl.get_platforms():
                     for device in platform.get_devices():
                         # Failed now because of POCL implementation
-                        #deviceType=cl.device_type.to_string(device.type)
-                        deviceType="xPU"
-                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
-                        Id=Id+1
+                        # deviceType=cl.device_type.to_string(device.type)
+                        deviceType = "xPU"
+                        print(
+                            "Device #%i from %s of type %s : %s"
+                            % (
+                                Id,
+                                platform.vendor.lstrip(),
+                                deviceType,
+                                device.name.lstrip(),
+                            )
+                        )
+                        Id = Id + 1
                 sys.exit()
             except ImportError:
                 print("Your platform does not seem to support OpenCL")
                 sys.exit()
 
         elif opt in ("-t", "--valuetype"):
-            if arg=='FP64':
-                class MyFloat(np.float64): pass
+            if arg == "FP64":
+
+                class MyFloat(np.float64):
+                    pass
+
             else:
-                class MyFloat(np.float32):pass
+
+                class MyFloat(np.float32):
+                    pass
+
             ValueType = arg
         elif opt in ("-d", "--device"):
-            Device=int(arg)
+            Device = int(arg)
         elif opt in ("-m", "--method"):
-            Method=arg
+            Method = arg
         elif opt in ("-b", "--shape"):
-            Shape=arg
-            if Shape!='Ball' or Shape!='Box':
-                print('Wrong argument: set to Ball')
+            Shape = arg
+            if Shape != "Ball" or Shape != "Box":
+                print("Wrong argument: set to Ball")
         elif opt in ("-n", "--number"):
-            Number=int(arg)
+            Number = int(arg)
         elif opt in ("-i", "--iterations"):
-            Iterations=int(arg)
+            Iterations = int(arg)
         elif opt in ("-z", "--size"):
-            SizeOfShape=MyFloat(arg)
+            SizeOfShape = MyFloat(arg)
         elif opt in ("-v", "--velocity"):
-            Velocity=MyFloat(arg)
-            VirielStress=False
+            Velocity = MyFloat(arg)
+            VirielStress = False
         elif opt in ("-s", "--step"):
-            Step=MyFloat(arg)
+            Step = MyFloat(arg)
         elif opt in ("-r", "--random"):
-            InitialRandom=True
+            InitialRandom = True
         elif opt in ("-c", "--check"):
-            CheckEnergies=True
+            CheckEnergies = True
         elif opt in ("-e", "--viriel"):
-            VirielStress=True
+            VirielStress = True
         elif opt in ("-g", "--opengl"):
-            OpenGL=True
+            OpenGL = True
         elif opt in ("-p", "--potential"):
-            InterType='Potential'
+            InterType = "Potential"
         elif opt in ("-x", "--coarev"):
-            CoArEv=arg
+            CoArEv = arg
         elif opt in ("-o", "--verbose"):
-            Verbose=True
+            Verbose = True
+
+    SizeOfShape = np.sqrt(MyFloat(SizeOfShape * Number))
+    Velocity = MyFloat(Velocity)
+    Step = MyFloat(Step)
 
-    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
-    Velocity=MyFloat(Velocity)
-    Step=MyFloat(Step)
-                
     print("Device choosed : %s" % Device)
     print("Number of particules : %s" % Number)
     print("Size of Shape : %s" % SizeOfShape)
@@ -742,49 +795,63 @@ if __name__=='__main__':
     print("Interaction type : %s" % InterType)
     print("Counter Artevasion type : %s" % CoArEv)
 
-    # Create Numpy array of CL vector with 8 FP32    
-    MyCoM = np.zeros(4,dtype=MyFloat)
-    MyDataX = np.zeros(Number*4, dtype=MyFloat)
-    MyDataV = np.zeros(Number*4, dtype=MyFloat)
+    # Create Numpy array of CL vector with 8 FP32
+    MyCoM = np.zeros(4, dtype=MyFloat)
+    MyDataX = np.zeros(Number * 4, dtype=MyFloat)
+    MyDataV = np.zeros(Number * 4, dtype=MyFloat)
     MyPotential = np.zeros(Number, dtype=MyFloat)
     MyKinetic = np.zeros(Number, dtype=MyFloat)
 
-    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
+    Marsaglia, Computing, Interaction, Artevasion = DictionariesAPI()
 
     # Scan the OpenCL arrays
-    Id=0
-    HasXPU=False
+    Id = 0
+    HasXPU = False
     for platform in cl.get_platforms():
         for device in platform.get_devices():
-            if Id==Device:
-                PlatForm=platform
-                XPU=device
-                print("CPU/GPU selected: ",device.name.lstrip())
-                print("Platform selected: ",platform.name)
-                HasXPU=True
-            Id+=1
-
-    if HasXPU==False:
-        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
-        sys.exit()      
+            if Id == Device:
+                PlatForm = platform
+                XPU = device
+                print("CPU/GPU selected: ", device.name.lstrip())
+                print("Platform selected: ", platform.name)
+                HasXPU = True
+            Id += 1
+
+    if not HasXPU:
+        print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
+        sys.exit()
 
     # Create Context
     try:
         ctx = cl.Context([XPU])
-        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
-    except:
+        queue = cl.CommandQueue(
+            ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
+        )
+    except Exception:
         print("Crash during context creation")
 
     # Build all routines used for the computing
 
-    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
-    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
-
-    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
-        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
+    # BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])  # noqa: E501
+    BuildOptions = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (  # noqa: E501
+        Marsaglia[RNG],
+        Computing[ValueType],
+        Interaction[InterType],
+        Artevasion[CoArEv],
+    )
+
+    if (
+        "Intel" in PlatForm.name
+        or "Experimental" in PlatForm.name
+        or "Clover" in PlatForm.name
+        or "Portable" in PlatForm.name
+    ):
+        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
     else:
-        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
-        
+        MyRoutines = cl.Program(ctx, BlobOpenCL).build(
+            options=BuildOptions + " -cl-strict-aliasing"
+        )
+
     mf = cl.mem_flags
     # Read/Write approach for buffering
     clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
@@ -792,130 +859,210 @@ if __name__=='__main__':
     clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
     clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
     clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
-    
+
     # Write/HostPointer approach for buffering
     # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
     # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
-    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
+    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)  # noqa: E501
     # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
     # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
 
-    print('All particles superimposed.')
+    print("All particles superimposed.")
 
     # Set particles to RNG points
     if InitialRandom:
-        seed_w=np.uint32(nprnd(2**32))
-        seed_z=np.uint32(nprnd(2**32))
+        seed_w = np.uint32(nprnd(2 ** 32))
+        seed_z = np.uint32(nprnd(2 ** 32))
     else:
-        seed_w=np.uint32(19710211)
-        seed_z=np.uint32(20081010)
-            
-    if Shape=='Ball':
-        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
+        seed_w = np.uint32(19710211)
+        seed_z = np.uint32(20081010)
+
+    if Shape == "Ball":
+        MyRoutines.InBallSplutterPoints(
+            queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
+        )
     else:
-        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
+        MyRoutines.InBoxSplutterPoints(
+            queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
+        )
 
-    print('All particules distributed')
+    print("All particules distributed")
 
-    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
+    CLLaunch = MyRoutines.CenterOfMass(
+        queue, (1, 1), None, clDataX, clCoM, np.int32(Number)
+    )
     CLLaunch.wait()
-    cl.enqueue_copy(queue,MyCoM,clCoM)
-    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
-    
+    cl.enqueue_copy(queue, MyCoM, clCoM)
+    print("Center Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
+
     if VirielStress:
-        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
+        CLLaunch = MyRoutines.SplutterStress(
+            queue,
+            (Number, 1),
+            None,
+            clDataX,
+            clDataV,
+            clCoM,
+            MyFloat(0.0),
+            np.uint32(110271),
+            np.uint32(250173),
+        )
     else:
-        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
+        CLLaunch = MyRoutines.SplutterStress(
+            queue,
+            (Number, 1),
+            None,
+            clDataX,
+            clDataV,
+            clCoM,
+            Velocity,
+            np.uint32(110271),
+            np.uint32(250173),
+        )
     CLLaunch.wait()
 
-    print('All particules stressed')
+    print("All particules stressed")
 
-    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
-    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
+    CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
+    CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
     CLLaunch.wait()
-    cl.enqueue_copy(queue,MyPotential,clPotential)
-    cl.enqueue_copy(queue,MyKinetic,clKinetic)
-    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
+    cl.enqueue_copy(queue, MyPotential, clPotential)
+    cl.enqueue_copy(queue, MyKinetic, clKinetic)
+    print(
+        "Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
+        % (
+            np.sum(MyPotential) + 2 * np.sum(MyKinetic),
+            np.sum(MyPotential),
+            np.sum(MyKinetic),
+        )
+    )
 
     if SpeedRendering:
-        SizeOfBox=max(2*MyKinetic)
+        SizeOfBox = max(2 * MyKinetic)
     else:
-        SizeOfBox=SizeOfShape        
-    
+        SizeOfBox = SizeOfShape
+
     if OpenGL:
-        print('\tTiny documentation to interact OpenGL rendering:\n')
-        print('\t<Left|Right> Rotate around X axis')
-        print('\t  <Up|Down>  Rotate around Y axis')
-        print('\t   <z|Z>     Rotate around Z axis')
-        print('\t   <-|+>     Unzoom/Zoom')
-        print('\t    <s>      Toggle to display Positions or Velocities')
-        print('\t   <Esc>     Quit\n')
-    
-    wall_time_start=time.time()
-
-    Durations=np.array([],dtype=MyFloat)
-    print('Starting!')
+        print("\tTiny documentation to interact OpenGL rendering:\n")
+        print("\t<Left|Right> Rotate around X axis")
+        print("\t  <Up|Down>  Rotate around Y axis")
+        print("\t   <z|Z>     Rotate around Z axis")
+        print("\t   <-|+>     Unzoom/Zoom")
+        print("\t    <s>      Toggle to display Positions or Velocities")
+        print("\t   <Esc>     Quit\n")
+
+    wall_time_start = time.time()
+
+    Durations = np.array([], dtype=MyFloat)
+    print("Starting!")
     if OpenGL:
-        from OpenGL.GL import *
-        from OpenGL.GLUT import *
-        global ViewRX,ViewRY,ViewRZ
-        Iterations=0
-        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
+        import OpenGL.GL as gl
+        import OpenGL.GLUT as glut
+
+        global ViewRX, ViewRY, ViewRZ
+        Iterations = 0
+        ViewRX, ViewRY, ViewRZ = 0.0, 0.0, 0.0
         # Launch OpenGL Loop
-        glutInit(sys.argv)
-        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
-        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
-        glutInitWindowSize(512,512)
-        glutCreateWindow(b'NBodyGL')
+        glut.glutInit(sys.argv)
+        glut.glutInitDisplayMode(glut.GLUT_DOUBLE | glut.GLUT_RGB)
+        glut.glutSetOption(glut.GLUT_ACTION_ON_WINDOW_CLOSE,
+                glut.GLUT_ACTION_CONTINUE_EXECUTION)
+        glut.glutInitWindowSize(512, 512)
+        glut.glutCreateWindow(b"NBodyGL")
         setup_viewport()
-        glutReshapeFunc(reshape)
-        glutDisplayFunc(display)
-        glutIdleFunc(display)
+        glut.glutReshapeFunc(reshape)
+        glut.glutDisplayFunc(display)
+        glut.glutIdleFunc(display)
         #   glutMouseFunc(mouse)
-        glutSpecialFunc(special)
-        glutKeyboardFunc(keyboard)
-        glutMainLoop()
+        glut.glutSpecialFunc(special)
+        glut.glutKeyboardFunc(keyboard)
+        glut.glutMainLoop()
     else:
         for iteration in range(Iterations):
-            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
+            Elapsed = MainOpenCL(clDataX, clDataV, Step, Method)
             if Verbose:
                 # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
                 cl.enqueue_copy(queue, MyDataX, clDataX)
-                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
+                print("Positions for #%s iteration: %s" % (iteration, MyDataX))
             else:
-                sys.stdout.write('.')
+                sys.stdout.write(".")
                 sys.stdout.flush()
-            Durations=np.append(Durations,Elapsed)
+            Durations = np.append(Durations, Elapsed)
 
-    print('\nEnding!')
-            
-    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
-    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
-    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
-    CLLaunch.wait()
-    cl.enqueue_copy(queue,MyCoM,clCoM)
-    cl.enqueue_copy(queue,MyPotential,clPotential)
-    cl.enqueue_copy(queue,MyKinetic,clKinetic)
-    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
-    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
+    print("\nEnding!")
 
-    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
+    MyRoutines.CenterOfMass(queue, (1, 1), None, clDataX, clCoM, np.int32(Number))
+    CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
+    CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
+    CLLaunch.wait()
+    cl.enqueue_copy(queue, MyCoM, clCoM)
+    cl.enqueue_copy(queue, MyPotential, clPotential)
+    cl.enqueue_copy(queue, MyKinetic, clKinetic)
+    print("\nCenter Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
+    print(
+        "Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
+        % (
+            np.sum(MyPotential) + 2.0 * np.sum(MyKinetic),
+            np.sum(MyPotential),
+            np.sum(MyKinetic),
+        )
+    )
+
+    print(
+        "Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n"  # noqa: E501
+        % (
+            Device,
+            Iterations,
+            np.mean(Durations),
+            np.median(Durations),
+            np.std(Durations),
+            np.min(Durations),
+            np.max(Durations),
+            np.std(Durations) / np.median(Durations),
+        )
+    )
 
     # FPS: 1/Elapsed
-    FPS=np.ones(len(Durations))
-    FPS/=Durations
-
-    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
+    FPS = np.ones(len(Durations))
+    FPS /= Durations
+
+    print(
+        "FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n"  # noqa: E501
+        % (
+            Device,
+            Iterations,
+            np.mean(FPS),
+            np.median(FPS),
+            np.std(FPS),
+            np.min(FPS),
+            np.max(FPS),
+        )
+    )
 
     # Contraction of Square*Size*Hertz: Size*Size/Elapsed
-    Squertz=np.ones(len(Durations))
-    Squertz*=Number*Number
-    Squertz/=Durations
+    Squertz = np.ones(len(Durations))
+    Squertz *= Number * Number
+    Squertz /= Durations
+
+    print(
+        "Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n"  # noqa: E501
+        % (
+            Device,
+            Iterations,
+            np.log10(np.mean(Squertz)),
+            np.mean(Squertz),
+            np.log10(np.median(Squertz)),
+            np.median(Squertz),
+            np.log10(np.std(Squertz)),
+            np.std(Squertz),
+            np.log10(np.min(Squertz)),
+            np.min(Squertz),
+            np.log10(np.max(Squertz)),
+            np.max(Squertz),
+        )
+    )
 
-    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
-        
     clDataX.release()
     clDataV.release()
     clKinetic.release()
     clPotential.release()
-
diff --git a/examples/narray.py b/examples/narray.py
index 924c0d69cd89754574b68939c403c92822c5aa07..2ea00fa7f7eab024dc18b314f7dadb70015509a1 100644
--- a/examples/narray.py
+++ b/examples/narray.py
@@ -2,7 +2,7 @@
 import pyopencl as cl
 import numpy as np
 
-demo_r = np.empty( (500,5), dtype=np.uint32)
+demo_r = np.empty((500, 5), dtype=np.uint32)
 ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)
 
@@ -23,7 +23,7 @@ __kernel void demo(__global uint *demo)
 
 try:
     prg.build()
-except:
+except Exception:
     print("Error:")
     print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
     raise
@@ -33,4 +33,3 @@ cl.enqueue_copy(queue, demo_r, demo_buf).wait()
 
 for res in demo_r:
     print(res)
-
diff --git a/examples/pi-monte-carlo.py b/examples/pi-monte-carlo.py
old mode 100755
new mode 100644
index 1fc0a91e22181a40ef87ab577bec5118951601c7..dcba18f5ef1d16beaea3ed21b2c0dec38a4dce9e
--- a/examples/pi-monte-carlo.py
+++ b/examples/pi-monte-carlo.py
@@ -9,7 +9,7 @@
 #
 # use -h for complete set of options
 #
-# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
+# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
 # Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
 #
 
@@ -17,7 +17,7 @@
 # http://mathema.tician.de/software/pycuda
 # Thanks to Andreas Klockner for PyOpenCL:
 # http://mathema.tician.de/software/pyopencl
-# 
+#
 
 # 2013-01-01 : problems with launch timeout
 # http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
@@ -25,107 +25,119 @@
 
 # Common tools
 import numpy
-from numpy.random import randint as nprnd
 import sys
 import getopt
 import time
 import itertools
 from socket import gethostname
 
+
 class PenStacle:
     """Pentacle of Statistics from data"""
-    Avg=0
-    Med=0
-    Std=0
-    Min=0
-    Max=0
-    def __init__(self,Data):
-        self.Avg=numpy.average(Data)
-        self.Med=numpy.median(Data)
-        self.Std=numpy.std(Data)
-        self.Max=numpy.max(Data)
-        self.Min=numpy.min(Data)
+
+    Avg = 0
+    Med = 0
+    Std = 0
+    Min = 0
+    Max = 0
+
+    def __init__(self, Data):
+        self.Avg = numpy.average(Data)
+        self.Med = numpy.median(Data)
+        self.Std = numpy.std(Data)
+        self.Max = numpy.max(Data)
+        self.Min = numpy.min(Data)
+
     def display(self):
-        print("%s %s %s %s %s" % (self.Avg,self.Med,self.Std,self.Min,self.Max))
+        print("%s %s %s %s %s" % (self.Avg, self.Med, self.Std, self.Min, self.Max))
+
 
 class Experience:
     """Metrology for experiences"""
-    DeviceStyle=''
-    DeviceId=0
-    AvgD=0
-    MedD=0
-    StdD=0
-    MinD=0
-    MaxD=0
-    AvgR=0
-    MedR=0
-    StdR=0
-    MinR=0
-    MaxR=0
-    def __init__(self,DeviceStyle,DeviceId,Iterations):
-        self.DeviceStyle=DeviceStyle
-        self.DeviceId=DeviceId
+
+    DeviceStyle = ""
+    DeviceId = 0
+    AvgD = 0
+    MedD = 0
+    StdD = 0
+    MinD = 0
+    MaxD = 0
+    AvgR = 0
+    MedR = 0
+    StdR = 0
+    MinR = 0
+    MaxR = 0
+
+    def __init__(self, DeviceStyle, DeviceId, Iterations):
+        self.DeviceStyle = DeviceStyle
+        self.DeviceId = DeviceId
         self.Iterations
-        
-    def Metrology(self,Data):
-        Duration=PenStacle(Data)
-        Rate=PenStacle(Iterations/Data)
+
+    def Metrology(self, Data):
+        Duration = PenStacle(Data)
+        Rate = PenStacle(Iterations / Data)
         print("Duration %s" % Duration)
         print("Rate %s" % Rate)
 
 
-        
 def DictionariesAPI():
-    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
-    Computing={'INT32':0,'INT64':1,'FP32':2,'FP64':3}
-    Test={True:1,False:0}
-    return(Marsaglia,Computing,Test)
-    
+    Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3}
+    Computing = {"INT32": 0, "INT64": 1, "FP32": 2, "FP64": 3}
+    Test = {True: 1, False: 0}
+    return (Marsaglia, Computing, Test)
+
+
 # find prime factors of a number
 # Get for WWW :
 # http://pythonism.wordpress.com/2008/05/17/looking-at-factorisation-in-python/
 def PrimeFactors(x):
 
-    factorlist=numpy.array([]).astype('uint32')
-    loop=2
-    while loop<=x:
-        if x%loop==0:
-            x/=loop
-            factorlist=numpy.append(factorlist,[loop])
+    factorlist = numpy.array([]).astype("uint32")
+    loop = 2
+    while loop <= x:
+        if x % loop == 0:
+            x /= loop
+            factorlist = numpy.append(factorlist, [loop])
         else:
-            loop+=1
+            loop += 1
     return factorlist
 
+
 # Try to find the best thread number in Hybrid approach (Blocks&Threads)
 # output is thread number
 def BestThreadsNumber(jobs):
-    factors=PrimeFactors(jobs)
-    matrix=numpy.append([factors],[factors[::-1]],axis=0)
-    threads=1
+    factors = PrimeFactors(jobs)
+    matrix = numpy.append([factors], [factors[::-1]], axis=0)
+    threads = 1
     for factor in matrix.transpose().ravel():
-        threads=threads*factor
-        if threads*threads>jobs or threads>512:
+        threads = threads * factor
+        if threads * threads > jobs or threads > 512:
             break
-    return(long(threads))
+    return int(threads)
+
 
-# Predicted Amdahl Law (Reduced with s=1-p)  
+# Predicted Amdahl Law (Reduced with s=1-p)
 def AmdahlR(N, T1, p):
-    return (T1*(1-p+p/N))
+    return T1 * (1 - p + p / N)
+
 
 # Predicted Amdahl Law
 def Amdahl(N, T1, s, p):
-    return (T1*(s+p/N))
+    return T1 * (s + p / N)
+
 
 # Predicted Mylq Law with first order
-def Mylq(N, T1,s,c,p):
-    return (T1*(s+p/N)+c*N)
+def Mylq(N, T1, s, c, p):
+    return T1 * (s + p / N) + c * N
+
 
 # Predicted Mylq Law with second order
-def Mylq2(N, T1,s,c1,c2,p):
-    return (T1*(s+p/N)+c1*N+c2*N*N)
+def Mylq2(N, T1, s, c1, c2, p):
+    return T1 * (s + p / N) + c1 * N + c2 * N * N
+
 
 def KernelCodeCuda():
-    KERNEL_CODE_CUDA="""
+    KERNEL_CODE_CUDA = """
 #define TCONG 0
 #define TSHR3 1
 #define TMWC 2
@@ -272,10 +284,11 @@ __global__ void MainLoopHybrid(ulong *s,ulong iterations,uint seed_w,uint seed_z
 }
 
 """
-    return(KERNEL_CODE_CUDA)
+    return KERNEL_CODE_CUDA
+
 
 def KernelCodeOpenCL():
-    KERNEL_CODE_OPENCL="""
+    KERNEL_CODE_OPENCL = """
 #define TCONG 0
 #define TSHR3 1
 #define TMWC 2
@@ -395,25 +408,28 @@ ulong MainLoop(ulong iterations,uint seed_z,uint seed_w,size_t work)
       total+=inside;
 #endif
    }
-   
+
    return(total);
 }
 
-__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
+__kernel void MainLoopGlobal(
+    __global ulong *s,ulong iterations,uint seed_w,uint seed_z)
 {
    ulong total=MainLoop(iterations,seed_z,seed_w,get_global_id(0));
    barrier(CLK_GLOBAL_MEM_FENCE);
-   s[get_global_id(0)]=total;     
+   s[get_global_id(0)]=total;
 }
 
-__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
+__kernel void MainLoopLocal(
+    __global ulong *s,ulong iterations,uint seed_w,uint seed_z)
 {
    ulong total=MainLoop(iterations,seed_z,seed_w,get_local_id(0));
    barrier(CLK_LOCAL_MEM_FENCE);
    s[get_local_id(0)]=total;
 }
 
-__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
+__kernel void MainLoopHybrid(
+    __global ulong *s,ulong iterations,uint seed_w,uint seed_z)
 {
    ulong total=MainLoop(iterations,seed_z,seed_w,get_global_id(0));
    barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);
@@ -421,184 +437,221 @@ __kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint
 }
 
 """
-    return(KERNEL_CODE_OPENCL)
-    
+    return KERNEL_CODE_OPENCL
+
+
 def MetropolisCuda(InputCU):
 
-    print("Inside ",InputCU)
-    
-    iterations=InputCU['Iterations']
-    steps=InputCU['Steps']
-    blocks=InputCU['Blocks']
-    threads=InputCU['Threads']
-    Device=InputCU['Device']
-    RNG=InputCU['RNG']
-    ValueType=InputCU['ValueType']
-    TestType=InputCU['IfThen']
-    Seeds=InputCU['Seeds']
-    
-    Marsaglia,Computing,Test=DictionariesAPI()
-    
+    print("Inside ", InputCU)
+
+    iterations = InputCU["Iterations"]
+    steps = InputCU["Steps"]
+    blocks = InputCU["Blocks"]
+    threads = InputCU["Threads"]
+    Device = InputCU["Device"]
+    RNG = InputCU["RNG"]
+    ValueType = InputCU["ValueType"]
+    Seeds = InputCU["Seeds"]
+
+    Marsaglia, Computing, Test = DictionariesAPI()
+
     try:
         # For PyCUDA import
         import pycuda.driver as cuda
         from pycuda.compiler import SourceModule
-        
+
         cuda.init()
         for Id in range(cuda.Device.count()):
-            if Id==Device:
-                XPU=cuda.Device(Id)
+            if Id == Device:
+                XPU = cuda.Device(Id)
                 print("GPU selected %s" % XPU.name())
         print
 
     except ImportError:
         print("Platform does not seem to support CUDA")
 
-    circle=numpy.zeros(blocks*threads).astype(numpy.uint64)  
+    circle = numpy.zeros(blocks * threads).astype(numpy.uint64)
     circleCU = cuda.InOut(circle)
-    #circleCU = cuda.mem_alloc(circle.size*circle.dtype.itemize)
-    #cuda.memcpy_htod(circleCU, circle)
+    # circleCU = cuda.mem_alloc(circle.size*circle.dtype.itemize)
+    # cuda.memcpy_htod(circleCU, circle)
 
-    Context=XPU.make_context()
+    Context = XPU.make_context()
 
     try:
-        mod = SourceModule(KernelCodeCuda(),options=['--compiler-options','-DTRNG=%i -DTYPE=%s' % (Marsaglia[RNG],Computing[ValueType])])
-        #mod = SourceModule(KernelCodeCuda(),nvcc='nvcc',keep=True)
+        mod = SourceModule(
+            KernelCodeCuda(),
+            options=[
+                "--compiler-options",
+                "-DTRNG=%i -DTYPE=%s" % (Marsaglia[RNG], Computing[ValueType]),
+            ],
+        )
+        # mod = SourceModule(KernelCodeCuda(),nvcc='nvcc',keep=True)
         # Needed to set the compiler via ccbin for CUDA9 implementation
-        #mod = SourceModule(KernelCodeCuda(),options=['-ccbin','clang-3.9','--compiler-options','-DTRNG=%i' % Marsaglia[RNG],'-DTYPE=%s' % Computing[ValueType],'-DTEST=%s' % Test[TestType]],keep=True)
-    except:
+        # mod = SourceModule(KernelCodeCuda(),options=['-ccbin','clang-3.9','--compiler-options','-DTRNG=%i' % Marsaglia[RNG],'-DTYPE=%s' % Computing[ValueType],'-DTEST=%s' % Test[TestType]],keep=True)  # noqa: E501
+    except Exception:
         print("Compilation seems to break")
- 
-    MetropolisBlocksCU=mod.get_function("MainLoopBlocks")
-    MetropolisThreadsCU=mod.get_function("MainLoopThreads")
-    MetropolisHybridCU=mod.get_function("MainLoopHybrid")
 
-    MyDuration=numpy.zeros(steps)
+    MetropolisBlocksCU = mod.get_function("MainLoopBlocks")  # noqa: F841
+    MetropolisThreadsCU = mod.get_function("MainLoopThreads")  # noqa: F841
+    MetropolisHybridCU = mod.get_function("MainLoopHybrid")
+
+    MyDuration = numpy.zeros(steps)
 
-    jobs=blocks*threads;
+    jobs = blocks * threads
 
-    iterationsCU=numpy.uint64(iterations/jobs)
-    if iterations%jobs!=0:
-        iterationsCU+=numpy.uint64(1)
+    iterationsCU = numpy.uint64(iterations / jobs)
+    if iterations % jobs != 0:
+        iterationsCU += numpy.uint64(1)
 
     for i in range(steps):
-        start_time=time.time()
+        start_time = time.time()
 
         try:
-            MetropolisHybridCU(circleCU,
-                               numpy.uint64(iterationsCU),
-                               numpy.uint32(Seeds[0]),
-                               numpy.uint32(Seeds[1]),
-                               grid=(blocks,1),block=(threads,1,1))
-        except:
+            MetropolisHybridCU(
+                circleCU,
+                numpy.uint64(iterationsCU),
+                numpy.uint32(Seeds[0]),
+                numpy.uint32(Seeds[1]),
+                grid=(blocks, 1),
+                block=(threads, 1, 1),
+            )
+        except Exception:
             print("Crash during CUDA call")
 
-        elapsed = time.time()-start_time
-        print("(Blocks/Threads)=(%i,%i) method done in %.2f s..." % (blocks,threads,elapsed))
+        elapsed = time.time() - start_time
+        print(
+            "(Blocks/Threads)=(%i,%i) method done in %.2f s..."
+            % (blocks, threads, elapsed)
+        )
 
-        MyDuration[i]=elapsed
+        MyDuration[i] = elapsed
 
-    OutputCU={'Inside':sum(circle),'NewIterations':numpy.uint64(iterationsCU*jobs),'Duration':MyDuration}
+    OutputCU = {
+        "Inside": sum(circle),
+        "NewIterations": numpy.uint64(iterationsCU * jobs),
+        "Duration": MyDuration,
+    }
     print(OutputCU)
     Context.pop()
-    
+
     Context.detach()
 
-    return(OutputCU)
+    return OutputCU
+
 
 def MetropolisOpenCL(InputCL):
 
     import pyopencl as cl
 
-    iterations=InputCL['Iterations']
-    steps=InputCL['Steps']
-    blocks=InputCL['Blocks']
-    threads=InputCL['Threads']
-    Device=InputCL['Device']
-    RNG=InputCL['RNG']  
-    ValueType=InputCL['ValueType']
-    TestType=InputCL['IfThen']
-    Seeds=InputCL['Seeds']
+    iterations = InputCL["Iterations"]
+    steps = InputCL["Steps"]
+    blocks = InputCL["Blocks"]
+    threads = InputCL["Threads"]
+    Device = InputCL["Device"]
+    RNG = InputCL["RNG"]
+    ValueType = InputCL["ValueType"]
+    TestType = InputCL["IfThen"]
+    Seeds = InputCL["Seeds"]
 
-    Marsaglia,Computing,Test=DictionariesAPI()
+    Marsaglia, Computing, Test = DictionariesAPI()
 
     # Initialisation des variables en les CASTant correctement
-    Id=0
-    HasXPU=False
+    Id = 0
+    HasXPU = False
     for platform in cl.get_platforms():
         for device in platform.get_devices():
-            if Id==Device:
-                XPU=device
-                print("CPU/GPU selected: ",device.name.lstrip())
-                HasXPU=True
-            Id+=1
+            if Id == Device:
+                XPU = device
+                print("CPU/GPU selected: ", device.name.lstrip())
+                HasXPU = True
+            Id += 1
             # print(Id)
 
-    if HasXPU==False:
-        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
-        sys.exit()           
+    if not HasXPU:
+        print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
+        sys.exit()
 
     # Je cree le contexte et la queue pour son execution
     try:
         ctx = cl.Context(devices=[XPU])
-        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
-    except:
+        queue = cl.CommandQueue(
+            ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
+        )
+    except Exception:
         print("Crash during context creation")
 
     # Je recupere les flag possibles pour les buffers
     mf = cl.mem_flags
-	
-    circle=numpy.zeros(blocks*threads).astype(numpy.uint64)
-    circleCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=circle)
-    
-    MetropolisCL = cl.Program(ctx,KernelCodeOpenCL()).build( options = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%s -DTEST=%s" % (Marsaglia[RNG],Computing[ValueType],Test[TestType]))
 
-    MyDuration=numpy.zeros(steps)
-  
-    jobs=blocks*threads;
+    circle = numpy.zeros(blocks * threads).astype(numpy.uint64)
+    circleCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=circle)
+
+    MetropolisCL = cl.Program(ctx, KernelCodeOpenCL()).build(
+        options="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%s -DTEST=%s"
+        % (Marsaglia[RNG], Computing[ValueType], Test[TestType])
+    )
+
+    MyDuration = numpy.zeros(steps)
 
-    iterationsCL=numpy.uint64(iterations/jobs)
-    if iterations%jobs!=0:
-        iterationsCL+=1
+    jobs = blocks * threads
+
+    iterationsCL = numpy.uint64(iterations / jobs)
+    if iterations % jobs != 0:
+        iterationsCL += 1
 
     for i in range(steps):
-        start_time=time.time()
+        start_time = time.time()
         if threads == 1:
-            CLLaunch=MetropolisCL.MainLoopGlobal(queue,(blocks,),None,
-                                                 circleCL,
-                                                 numpy.uint64(iterationsCL),
-                                                 numpy.uint32(Seeds[0]),
-                                                 numpy.uint32(Seeds[1]))
+            CLLaunch = MetropolisCL.MainLoopGlobal(
+                queue,
+                (blocks,),
+                None,
+                circleCL,
+                numpy.uint64(iterationsCL),
+                numpy.uint32(Seeds[0]),
+                numpy.uint32(Seeds[1]),
+            )
         else:
-            CLLaunch=MetropolisCL.MainLoopHybrid(queue,(jobs,),(threads,),
-                                                 circleCL,
-                                                 numpy.uint64(iterationsCL),
-                                                 numpy.uint32(Seeds[0]),
-                                                 numpy.uint32(Seeds[1]))
+            CLLaunch = MetropolisCL.MainLoopHybrid(
+                queue,
+                (jobs,),
+                (threads,),
+                circleCL,
+                numpy.uint64(iterationsCL),
+                numpy.uint32(Seeds[0]),
+                numpy.uint32(Seeds[1]),
+            )
 
         CLLaunch.wait()
         cl.enqueue_copy(queue, circle, circleCL).wait()
 
-        elapsed = time.time()-start_time
-        print("(Blocks/Threads)=(%i,%i) method done in %.2f s..." % (blocks,threads,elapsed))
-            
+        elapsed = time.time() - start_time
+        print(
+            "(Blocks/Threads)=(%i,%i) method done in %.2f s..."
+            % (blocks, threads, elapsed)
+        )
+
         # Elapsed method based on CLLaunch doesn't work for Beignet OpenCL
         # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
 
         # print circle,numpy.mean(circle),numpy.median(circle),numpy.std(circle)
-        MyDuration[i]=elapsed
+        MyDuration[i] = elapsed
         # AllPi=4./numpy.float32(iterationsCL)*circle.astype(numpy.float32)
         # MyPi[i]=numpy.median(AllPi)
         # print MyPi[i],numpy.std(AllPi),MyDuration[i]
 
     circleCL.release()
 
-    OutputCL={'Inside':sum(circle),'NewIterations':numpy.uint64(iterationsCL*jobs),'Duration':MyDuration}
+    OutputCL = {
+        "Inside": sum(circle),
+        "NewIterations": numpy.uint64(iterationsCL * jobs),
+        "Duration": MyDuration,
+    }
     # print(OutputCL)
-    return(OutputCL)
+    return OutputCL
 
 
-def FitAndPrint(N,D,Curves):
+def FitAndPrint(N, D, Curves):
 
     from scipy.optimize import curve_fit
     import matplotlib.pyplot as plt
@@ -606,158 +659,201 @@ def FitAndPrint(N,D,Curves):
     try:
         coeffs_Amdahl, matcov_Amdahl = curve_fit(Amdahl, N, D)
 
-        D_Amdahl=Amdahl(N,coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2])
-        coeffs_Amdahl[1]=coeffs_Amdahl[1]*coeffs_Amdahl[0]/D[0]
-        coeffs_Amdahl[2]=coeffs_Amdahl[2]*coeffs_Amdahl[0]/D[0]
-        coeffs_Amdahl[0]=D[0]
-        print("Amdahl Normalized: T=%.2f(%.6f+%.6f/N)" % (coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2]))
-    except:
+        D_Amdahl = Amdahl(N, coeffs_Amdahl[0], coeffs_Amdahl[1], coeffs_Amdahl[2])
+        coeffs_Amdahl[1] = coeffs_Amdahl[1] * coeffs_Amdahl[0] / D[0]
+        coeffs_Amdahl[2] = coeffs_Amdahl[2] * coeffs_Amdahl[0] / D[0]
+        coeffs_Amdahl[0] = D[0]
+        print(
+            "Amdahl Normalized: T=%.2f(%.6f+%.6f/N)"
+            % (coeffs_Amdahl[0], coeffs_Amdahl[1], coeffs_Amdahl[2])
+        )
+    except Exception:
         print("Impossible to fit for Amdahl law : only %i elements" % len(D))
 
     try:
         coeffs_AmdahlR, matcov_AmdahlR = curve_fit(AmdahlR, N, D)
 
-        D_AmdahlR=AmdahlR(N,coeffs_AmdahlR[0],coeffs_AmdahlR[1])
-        coeffs_AmdahlR[1]=coeffs_AmdahlR[1]*coeffs_AmdahlR[0]/D[0]
-        coeffs_AmdahlR[0]=D[0]
-        print("Amdahl Reduced Normalized: T=%.2f(%.6f+%.6f/N)" % (coeffs_AmdahlR[0],1-coeffs_AmdahlR[1],coeffs_AmdahlR[1]))
+        # D_AmdahlR = AmdahlR(N, coeffs_AmdahlR[0], coeffs_AmdahlR[1])
+        coeffs_AmdahlR[1] = coeffs_AmdahlR[1] * coeffs_AmdahlR[0] / D[0]
+        coeffs_AmdahlR[0] = D[0]
+        print(
+            "Amdahl Reduced Normalized: T=%.2f(%.6f+%.6f/N)"
+            % (coeffs_AmdahlR[0], 1 - coeffs_AmdahlR[1], coeffs_AmdahlR[1])
+        )
 
-    except:
+    except Exception:
         print("Impossible to fit for Reduced Amdahl law : only %i elements" % len(D))
 
     try:
         coeffs_Mylq, matcov_Mylq = curve_fit(Mylq, N, D)
 
-        coeffs_Mylq[1]=coeffs_Mylq[1]*coeffs_Mylq[0]/D[0]
+        coeffs_Mylq[1] = coeffs_Mylq[1] * coeffs_Mylq[0] / D[0]
         # coeffs_Mylq[2]=coeffs_Mylq[2]*coeffs_Mylq[0]/D[0]
-        coeffs_Mylq[3]=coeffs_Mylq[3]*coeffs_Mylq[0]/D[0]
-        coeffs_Mylq[0]=D[0]
-        print("Mylq Normalized : T=%.2f(%.6f+%.6f/N)+%.6f*N" % (coeffs_Mylq[0],
-                                                                coeffs_Mylq[1],
-                                                                coeffs_Mylq[3],
-                                                                coeffs_Mylq[2]))
-        D_Mylq=Mylq(N,coeffs_Mylq[0],coeffs_Mylq[1],coeffs_Mylq[2],
-                    coeffs_Mylq[3])
-    except:
+        coeffs_Mylq[3] = coeffs_Mylq[3] * coeffs_Mylq[0] / D[0]
+        coeffs_Mylq[0] = D[0]
+        print(
+            "Mylq Normalized : T=%.2f(%.6f+%.6f/N)+%.6f*N"
+            % (coeffs_Mylq[0], coeffs_Mylq[1], coeffs_Mylq[3], coeffs_Mylq[2])
+        )
+        D_Mylq = Mylq(N, coeffs_Mylq[0], coeffs_Mylq[1], coeffs_Mylq[2],
+                coeffs_Mylq[3])
+    except Exception:
         print("Impossible to fit for Mylq law : only %i elements" % len(D))
 
     try:
         coeffs_Mylq2, matcov_Mylq2 = curve_fit(Mylq2, N, D)
 
-        coeffs_Mylq2[1]=coeffs_Mylq2[1]*coeffs_Mylq2[0]/D[0]
+        coeffs_Mylq2[1] = coeffs_Mylq2[1] * coeffs_Mylq2[0] / D[0]
         # coeffs_Mylq2[2]=coeffs_Mylq2[2]*coeffs_Mylq2[0]/D[0]
         # coeffs_Mylq2[3]=coeffs_Mylq2[3]*coeffs_Mylq2[0]/D[0]
-        coeffs_Mylq2[4]=coeffs_Mylq2[4]*coeffs_Mylq2[0]/D[0]
-        coeffs_Mylq2[0]=D[0]
-        print("Mylq 2nd order Normalized: T=%.2f(%.6f+%.6f/N)+%.6f*N+%.6f*N^2" % (coeffs_Mylq2[0],coeffs_Mylq2[1],coeffs_Mylq2[4],coeffs_Mylq2[2],coeffs_Mylq2[3]))
-
-    except:
+        coeffs_Mylq2[4] = coeffs_Mylq2[4] * coeffs_Mylq2[0] / D[0]
+        coeffs_Mylq2[0] = D[0]
+        print(
+            "Mylq 2nd order Normalized: T=%.2f(%.6f+%.6f/N)+%.6f*N+%.6f*N^2"
+            % (
+                coeffs_Mylq2[0],
+                coeffs_Mylq2[1],
+                coeffs_Mylq2[4],
+                coeffs_Mylq2[2],
+                coeffs_Mylq2[3],
+            )
+        )
+
+    except Exception:
         print("Impossible to fit for 2nd order Mylq law : only %i elements" % len(D))
 
     if Curves:
         plt.xlabel("Number of Threads/work Items")
         plt.ylabel("Total Elapsed Time")
 
-        Experience,=plt.plot(N,D,'ro') 
+        (Experience,) = plt.plot(N, D, "ro")
     try:
-        pAmdahl,=plt.plot(N,D_Amdahl,label="Loi de Amdahl")    
-        pMylq,=plt.plot(N,D_Mylq,label="Loi de Mylq")
-    except:
+        (pAmdahl,) = plt.plot(N, D_Amdahl, label="Loi de Amdahl")
+        (pMylq,) = plt.plot(N, D_Mylq, label="Loi de Mylq")
+    except Exception:
         print("Fit curves seem not to be available")
 
     plt.legend()
     plt.show()
 
-if __name__=='__main__':
-    
+
+if __name__ == "__main__":
+
     # Set defaults values
-  
+
     # GPU style can be Cuda (Nvidia implementation) or OpenCL
-    GpuStyle='OpenCL'
+    GpuStyle = "OpenCL"
     # Iterations is integer
-    Iterations=1000000000
+    Iterations = 1000000000
     # BlocksBlocks in first number of Blocks to explore
-    BlocksBegin=1024
+    BlocksBegin = 1024
     # BlocksEnd is last number of Blocks to explore
-    BlocksEnd=1024
+    BlocksEnd = 1024
     # BlocksStep is the step of Blocks to explore
-    BlocksStep=1
+    BlocksStep = 1
     # ThreadsBlocks in first number of Blocks to explore
-    ThreadsBegin=1
+    ThreadsBegin = 1
     # ThreadsEnd is last number of Blocks to explore
-    ThreadsEnd=1
+    ThreadsEnd = 1
     # ThreadsStep is the step of Blocks to explore
-    ThreadsStep=1
+    ThreadsStep = 1
     # Redo is the times to redo the test to improve metrology
-    Redo=1
+    Redo = 1
     # OutMetrology is method for duration estimation : False is GPU inside
-    OutMetrology=False
-    Metrology='InMetro'
+    OutMetrology = False
+    Metrology = "InMetro"
     # Curves is True to print the curves
-    Curves=False
+    Curves = False
     # Fit is True to print the curves
-    Fit=False
+    Fit = False
     # Inside based on If
-    IfThen=False
+    IfThen = False
     # Marsaglia RNG
-    RNG='MWC'
+    RNG = "MWC"
     # Value type : INT32, INT64, FP32, FP64
-    ValueType='FP32'
+    ValueType = "FP32"
     # Seeds for RNG
-    Seeds=110271,101008
+    Seeds = 110271, 101008
+
+    HowToUse = "%s -o (Out of Core Metrology) -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>"  # noqa: E501
 
-    HowToUse='%s -o (Out of Core Metrology) -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
-    
     try:
-        opts, args = getopt.getopt(sys.argv[1:],"hockg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "hockg:i:b:e:s:f:l:t:r:d:m:v:",
+            [
+                "gpustyle=",
+                "iterations=",
+                "blocksBegin=",
+                "blocksEnd=",
+                "blocksStep=",
+                "threadsFirst=",
+                "threadsLast=",
+                "threadssTep=",
+                "redo=",
+                "device=",
+                "marsaglia=",
+                "valuetype=",
+            ],
+        )
     except getopt.GetoptError:
         print(HowToUse % sys.argv[0])
         sys.exit(2)
 
     # List of Devices
-    Devices=[]
-    Alu={}
-        
+    Devices = []
+    Alu = {}
+
     for opt, arg in opts:
-        if opt == '-h':
+        if opt == "-h":
             print(HowToUse % sys.argv[0])
 
             print("\nInformations about devices detected under OpenCL API:")
             # For PyOpenCL import
             try:
                 import pyopencl as cl
-                Id=0
+
+                Id = 0
                 for platform in cl.get_platforms():
                     for device in platform.get_devices():
-                        #deviceType=cl.device_type.to_string(device.type)
-                        deviceType="xPU"
-                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
-                        Id=Id+1
-
-            except:
+                        # deviceType=cl.device_type.to_string(device.type)
+                        deviceType = "xPU"
+                        print(
+                            "Device #%i from %s of type %s : %s"
+                            % (
+                                Id,
+                                platform.vendor.lstrip(),
+                                deviceType,
+                                device.name.lstrip(),
+                            )
+                        )
+                        Id = Id + 1
+
+            except Exception:
                 print("Your platform does not seem to support OpenCL")
 
             print("\nInformations about devices detected under CUDA API:")
             # For PyCUDA import
             try:
                 import pycuda.driver as cuda
+
                 cuda.init()
                 for Id in range(cuda.Device.count()):
-                    device=cuda.Device(Id)
-                    print("Device #%i of type GPU : %s" % (Id,device.name()))
+                    device = cuda.Device(Id)
+                    print("Device #%i of type GPU : %s" % (Id, device.name()))
                 print
-            except:
+            except Exception:
                 print("Your platform does not seem to support CUDA")
-        
+
             sys.exit()
-        
-        elif opt == '-o':
-            OutMetrology=True
-            Metrology='OutMetro'
-        elif opt == '-c':
-            Curves=True
-        elif opt == '-k':
-            IfThen=True
+
+        elif opt == "-o":
+            OutMetrology = True
+            Metrology = "OutMetro"
+        elif opt == "-c":
+            Curves = True
+        elif opt == "-k":
+            IfThen = True
         elif opt in ("-d", "--device"):
             Devices.append(int(arg))
         elif opt in ("-g", "--gpustyle"):
@@ -770,7 +866,7 @@ if __name__=='__main__':
             Iterations = numpy.uint64(arg)
         elif opt in ("-b", "--blocksbegin"):
             BlocksBegin = int(arg)
-            BlocksEnd= BlocksBegin
+            BlocksEnd = BlocksBegin
         elif opt in ("-e", "--blocksend"):
             BlocksEnd = int(arg)
         elif opt in ("-s", "--blocksstep"):
@@ -786,9 +882,9 @@ if __name__=='__main__':
             Redo = int(arg)
 
     # If no device has been specified, take the first one!
-    if len(Devices)==0:
+    if len(Devices) == 0:
         Devices.append(0)
-        
+
     print("Devices Identification : %s" % Devices)
     print("GpuStyle used : %s" % GpuStyle)
     print("Iterations : %s" % Iterations)
@@ -803,168 +899,267 @@ if __name__=='__main__':
     print("Type of Marsaglia RNG used : %s" % RNG)
     print("Type of variable : %s" % ValueType)
 
-    if GpuStyle=='CUDA':
+    if GpuStyle == "CUDA":
         try:
             # For PyCUDA import
             import pycuda.driver as cuda
-            
+
             cuda.init()
             for Id in range(cuda.Device.count()):
-                device=cuda.Device(Id)
-                print("Device #%i of type GPU : %s" % (Id,device.name()))
+                device = cuda.Device(Id)
+                print("Device #%i of type GPU : %s" % (Id, device.name()))
                 if Id in Devices:
-                    Alu[Id]='GPU'
-            
+                    Alu[Id] = "GPU"
+
         except ImportError:
             print("Platform does not seem to support CUDA")
 
-    if GpuStyle=='OpenCL':
+    if GpuStyle == "OpenCL":
         try:
             # For PyOpenCL import
             import pyopencl as cl
-            Id=0
+
+            Id = 0
             for platform in cl.get_platforms():
                 for device in platform.get_devices():
-                    #deviceType=cl.device_type.to_string(device.type)
-                    deviceType="xPU"
-                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
+                    # deviceType=cl.device_type.to_string(device.type)
+                    deviceType = "xPU"
+                    print(
+                        "Device #%i from %s of type %s : %s"
+                        % (
+                            Id,
+                            platform.vendor.lstrip().rstrip(),
+                            deviceType,
+                            device.name.lstrip().rstrip(),
+                        )
+                    )
 
                     if Id in Devices:
-                    # Set the Alu as detected Device Type
-                        Alu[Id]=deviceType
-                    Id=Id+1
+                        # Set the Alu as detected Device Type
+                        Alu[Id] = deviceType
+                    Id = Id + 1
         except ImportError:
             print("Platform does not seem to support OpenCL")
 
-    #print(Devices,Alu)
-            
-    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
-    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
-    
-    ExploredJobs=numpy.array([]).astype(numpy.uint32)
-    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
-    ExploredThreads=numpy.array([]).astype(numpy.uint32)
-    avgD=numpy.array([]).astype(numpy.float32)
-    medD=numpy.array([]).astype(numpy.float32)
-    stdD=numpy.array([]).astype(numpy.float32)
-    minD=numpy.array([]).astype(numpy.float32)
-    maxD=numpy.array([]).astype(numpy.float32)
-    avgR=numpy.array([]).astype(numpy.float32)
-    medR=numpy.array([]).astype(numpy.float32)
-    stdR=numpy.array([]).astype(numpy.float32)
-    minR=numpy.array([]).astype(numpy.float32)
-    maxR=numpy.array([]).astype(numpy.float32)
-
-    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
-        
+    # print(Devices,Alu)
+
+    BlocksList = range(BlocksBegin, BlocksEnd + BlocksStep, BlocksStep)
+    ThreadsList = range(ThreadsBegin, ThreadsEnd + ThreadsStep, ThreadsStep)
+
+    ExploredJobs = numpy.array([]).astype(numpy.uint32)
+    ExploredBlocks = numpy.array([]).astype(numpy.uint32)
+    ExploredThreads = numpy.array([]).astype(numpy.uint32)
+    avgD = numpy.array([]).astype(numpy.float32)
+    medD = numpy.array([]).astype(numpy.float32)
+    stdD = numpy.array([]).astype(numpy.float32)
+    minD = numpy.array([]).astype(numpy.float32)
+    maxD = numpy.array([]).astype(numpy.float32)
+    avgR = numpy.array([]).astype(numpy.float32)
+    medR = numpy.array([]).astype(numpy.float32)
+    stdR = numpy.array([]).astype(numpy.float32)
+    minR = numpy.array([]).astype(numpy.float32)
+    maxR = numpy.array([]).astype(numpy.float32)
+
+    for Blocks, Threads in itertools.product(BlocksList, ThreadsList):
+
         # print Blocks,Threads
-        circle=numpy.zeros(Blocks*Threads).astype(numpy.uint64)
-        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
-        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
-        ExploredThreads=numpy.append(ExploredThreads,Threads)
-        
-        if OutMetrology: 
-            DurationItem=numpy.array([]).astype(numpy.float32)
-            Duration=numpy.array([]).astype(numpy.float32)
-            Rate=numpy.array([]).astype(numpy.float32)
+        circle = numpy.zeros(Blocks * Threads).astype(numpy.uint64)
+        ExploredJobs = numpy.append(ExploredJobs, Blocks * Threads)
+        ExploredBlocks = numpy.append(ExploredBlocks, Blocks)
+        ExploredThreads = numpy.append(ExploredThreads, Threads)
+
+        if OutMetrology:
+            DurationItem = numpy.array([]).astype(numpy.float32)
+            Duration = numpy.array([]).astype(numpy.float32)
+            Rate = numpy.array([]).astype(numpy.float32)
             for i in range(Redo):
-                start=time.time()
-                if GpuStyle=='CUDA':
+                start = time.time()
+                if GpuStyle == "CUDA":
                     try:
-                        InputCU={}
-                        InputCU['Iterations']=Iterations
-                        InputCU['Steps']=1
-                        InputCU['Blocks']=Blocks
-                        InputCU['Threads']=Threads
-                        InputCU['Device']=Devices[0]
-                        InputCU['RNG']=RNG
-                        InputCU['Seeds']=Seeds
-                        InputCU['ValueType']=ValueType                    
-                        InputCU['IfThen']=IfThen                    
-                        OutputCU=MetropolisCuda(InputCU)
-                        Inside=OutputCU['Circle']
-                        NewIterations=OutputCU['NewIterations']
-                        Duration=OutputCU['Duration']
-                    except:
-                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
-                elif GpuStyle=='OpenCL':
+                        InputCU = {}
+                        InputCU["Iterations"] = Iterations
+                        InputCU["Steps"] = 1
+                        InputCU["Blocks"] = Blocks
+                        InputCU["Threads"] = Threads
+                        InputCU["Device"] = Devices[0]
+                        InputCU["RNG"] = RNG
+                        InputCU["Seeds"] = Seeds
+                        InputCU["ValueType"] = ValueType
+                        InputCU["IfThen"] = IfThen
+                        OutputCU = MetropolisCuda(InputCU)
+                        Inside = OutputCU["Circle"]
+                        NewIterations = OutputCU["NewIterations"]
+                        Duration = OutputCU["Duration"]
+                    except Exception:
+                        print(
+                            "Problem with (%i,%i) // computations on Cuda"
+                            % (Blocks, Threads)
+                        )
+                elif GpuStyle == "OpenCL":
                     try:
-                        InputCL={}
-                        InputCL['Iterations']=Iterations
-                        InputCL['Steps']=1
-                        InputCL['Blocks']=Blocks
-                        InputCL['Threads']=Threads
-                        InputCL['Device']=Devices[0]
-                        InputCL['RNG']=RNG
-                        InputCL['Seeds']=Seeds
-                        InputCL['ValueType']=ValueType
-                        InputCL['IfThen']=IfThen                    
-                        OutputCL=MetropolisOpenCL(InputCL)
-                        Inside=OutputCL['Circle']
-                        NewIterations=OutputCL['NewIterations']
-                        Duration=OutputCL['Duration']
-                    except:
-                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads))
-                Duration=numpy.append(Duration,time.time()-start)
-                Rate=numpy.append(Rate,NewIterations/Duration[-1])
+                        InputCL = {}
+                        InputCL["Iterations"] = Iterations
+                        InputCL["Steps"] = 1
+                        InputCL["Blocks"] = Blocks
+                        InputCL["Threads"] = Threads
+                        InputCL["Device"] = Devices[0]
+                        InputCL["RNG"] = RNG
+                        InputCL["Seeds"] = Seeds
+                        InputCL["ValueType"] = ValueType
+                        InputCL["IfThen"] = IfThen
+                        OutputCL = MetropolisOpenCL(InputCL)
+                        Inside = OutputCL["Circle"]
+                        NewIterations = OutputCL["NewIterations"]
+                        Duration = OutputCL["Duration"]
+                    except Exception:
+                        print(
+                            "Problem with (%i,%i) // computations on OpenCL"
+                            % (Blocks, Threads)
+                        )
+                Duration = numpy.append(Duration, time.time() - start)
+                Rate = numpy.append(Rate, NewIterations / Duration[-1])
         else:
-            if GpuStyle=='CUDA':
+            if GpuStyle == "CUDA":
                 try:
-                    InputCU={}
-                    InputCU['Iterations']=Iterations
-                    InputCU['Steps']=Redo
-                    InputCU['Blocks']=Blocks
-                    InputCU['Threads']=Threads
-                    InputCU['Device']=Devices[0]
-                    InputCU['RNG']=RNG
-                    InputCU['Seeds']=Seeds
-                    InputCU['ValueType']=ValueType
-                    InputCU['IfThen']=IfThen                    
-                    OutputCU=MetropolisCuda(InputCU)
-                    Inside=OutputCU['Inside']
-                    NewIterations=OutputCU['NewIterations']
-                    Duration=OutputCU['Duration']
-                    pycuda.context.pop()
-                except:
-                    print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
-            elif GpuStyle=='OpenCL':
+                    InputCU = {}
+                    InputCU["Iterations"] = Iterations
+                    InputCU["Steps"] = Redo
+                    InputCU["Blocks"] = Blocks
+                    InputCU["Threads"] = Threads
+                    InputCU["Device"] = Devices[0]
+                    InputCU["RNG"] = RNG
+                    InputCU["Seeds"] = Seeds
+                    InputCU["ValueType"] = ValueType
+                    InputCU["IfThen"] = IfThen
+                    OutputCU = MetropolisCuda(InputCU)
+                    Inside = OutputCU["Inside"]
+                    NewIterations = OutputCU["NewIterations"]
+                    Duration = OutputCU["Duration"]
+                    pycuda.context.pop()  # noqa: F821
+                except Exception:
+                    print(
+                        "Problem with (%i,%i) // computations on Cuda"
+                        % (Blocks, Threads)
+                    )
+            elif GpuStyle == "OpenCL":
                 try:
-                    InputCL={}
-                    InputCL['Iterations']=Iterations
-                    InputCL['Steps']=Redo
-                    InputCL['Blocks']=Blocks
-                    InputCL['Threads']=Threads
-                    InputCL['Device']=Devices[0]
-                    InputCL['RNG']=RNG
-                    InputCL['Seeds']=Seeds
-                    InputCL['ValueType']=ValueType
-                    InputCL['IfThen']=IfThen
-                    OutputCL=MetropolisOpenCL(InputCL)
-                    Inside=OutputCL['Inside']
-                    NewIterations=OutputCL['NewIterations']
-                    Duration=OutputCL['Duration']
-                except:
-                    print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads))
-            Rate=NewIterations/Duration[-1]
-            print("Itops %i\nLogItops %.2f " % (int(Rate),numpy.log(Rate)/numpy.log(10)))
-            print("Pi estimation %.8f" % (4./NewIterations*Inside))
-
-        avgD=numpy.append(avgD,numpy.average(Duration))
-        medD=numpy.append(medD,numpy.median(Duration))
-        stdD=numpy.append(stdD,numpy.std(Duration))
-        minD=numpy.append(minD,numpy.min(Duration))
-        maxD=numpy.append(maxD,numpy.max(Duration))
-        avgR=numpy.append(avgR,numpy.average(Rate))
-        medR=numpy.append(medR,numpy.median(Rate))
-        stdR=numpy.append(stdR,numpy.std(Rate))
-        minR=numpy.append(minR,numpy.min(Rate))
-        maxR=numpy.append(maxR,numpy.max(Rate))
-
-        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
-        
-        numpy.savez("Pi_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
-        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
-        numpy.savetxt("Pi_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
+                    InputCL = {}
+                    InputCL["Iterations"] = Iterations
+                    InputCL["Steps"] = Redo
+                    InputCL["Blocks"] = Blocks
+                    InputCL["Threads"] = Threads
+                    InputCL["Device"] = Devices[0]
+                    InputCL["RNG"] = RNG
+                    InputCL["Seeds"] = Seeds
+                    InputCL["ValueType"] = ValueType
+                    InputCL["IfThen"] = IfThen
+                    OutputCL = MetropolisOpenCL(InputCL)
+                    Inside = OutputCL["Inside"]
+                    NewIterations = OutputCL["NewIterations"]
+                    Duration = OutputCL["Duration"]
+                except Exception:
+                    print(
+                        "Problem with (%i,%i) // computations on OpenCL"
+                        % (Blocks, Threads)
+                    )
+            Rate = NewIterations / Duration[-1]
+            print(
+                "Itops %i\nLogItops %.2f "
+                % (int(Rate), numpy.log(Rate) / numpy.log(10))
+            )
+            print("Pi estimation %.8f" % (4.0 / NewIterations * Inside))
+
+        avgD = numpy.append(avgD, numpy.average(Duration))
+        medD = numpy.append(medD, numpy.median(Duration))
+        stdD = numpy.append(stdD, numpy.std(Duration))
+        minD = numpy.append(minD, numpy.min(Duration))
+        maxD = numpy.append(maxD, numpy.max(Duration))
+        avgR = numpy.append(avgR, numpy.average(Rate))
+        medR = numpy.append(medR, numpy.median(Rate))
+        stdR = numpy.append(stdR, numpy.std(Rate))
+        minR = numpy.append(minR, numpy.min(Rate))
+        maxR = numpy.append(maxR, numpy.max(Rate))
+
+        print(
+            "%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i"
+            % (
+                avgD[-1],
+                medD[-1],
+                stdD[-1],
+                minD[-1],
+                maxD[-1],
+                avgR[-1],
+                medR[-1],
+                stdR[-1],
+                minR[-1],
+                maxR[-1],
+            )
+        )
+
+        numpy.savez(
+            "Pi_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s"
+            % (
+                ValueType,
+                RNG,
+                Alu[Devices[0]],
+                GpuStyle,
+                BlocksBegin,
+                BlocksEnd,
+                ThreadsBegin,
+                ThreadsEnd,
+                Iterations,
+                Devices[0],
+                Metrology,
+                gethostname(),
+            ),
+            (
+                ExploredBlocks,
+                ExploredThreads,
+                avgD,
+                medD,
+                stdD,
+                minD,
+                maxD,
+                avgR,
+                medR,
+                stdR,
+                minR,
+                maxR,
+            ),
+        )
+        ToSave = [
+            ExploredBlocks,
+            ExploredThreads,
+            avgD,
+            medD,
+            stdD,
+            minD,
+            maxD,
+            avgR,
+            medR,
+            stdR,
+            minR,
+            maxR,
+        ]
+        numpy.savetxt(
+            "Pi_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s"
+            % (
+                ValueType,
+                RNG,
+                Alu[Devices[0]],
+                GpuStyle,
+                BlocksBegin,
+                BlocksEnd,
+                ThreadsBegin,
+                ThreadsEnd,
+                Iterations,
+                Devices[0],
+                Metrology,
+                gethostname(),
+            ),
+            numpy.transpose(ToSave),
+            fmt="%i %i %e %e %e %e %e %i %i %i %i %i",
+        )
 
     if Fit:
-        FitAndPrint(ExploredJobs,median,Curves)
+        FitAndPrint(ExploredJobs, median, Curves)  # noqa: F821, E501 # FIXME: undefined var 'median'
diff --git a/examples/transpose.py b/examples/transpose.py
index 6b06a98802eda2e26f4ad3ffb86cd7c761abd87c..d8915343ca222ee4173fc44209572cf55bb392f2 100644
--- a/examples/transpose.py
+++ b/examples/transpose.py
@@ -6,18 +6,16 @@ import numpy
 import numpy.linalg as la
 
 
-
-
 block_size = 16
 
 
-
-
 class NaiveTranspose:
     def __init__(self, ctx):
-        self.kernel = cl.Program(ctx, """
-        __kernel
-        void transpose(
+        self.kernel = (
+            cl.Program(
+                ctx,
+                """
+        __kernel void transpose(
           __global float *a_t, __global float *a,
           unsigned a_width, unsigned a_height)
         {
@@ -26,17 +24,25 @@ class NaiveTranspose:
 
           a_t[write_idx] = a[read_idx];
         }
-        """% {"block_size": block_size}).build().transpose
+        """,)
+            .build()
+            .transpose
+        )
 
     def __call__(self, queue, tgt, src, shape):
         w, h = shape
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h), (block_size, block_size),
-            tgt, src, numpy.uint32(w), numpy.uint32(h))
-
-
+        return self.kernel(
+            queue,
+            (w, h),
+            (block_size, block_size),
+            tgt,
+            src,
+            numpy.uint32(w),
+            numpy.uint32(h),
+        )
 
 
 class SillyTranspose(NaiveTranspose):
@@ -45,15 +51,17 @@ class SillyTranspose(NaiveTranspose):
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h), None,
-            tgt, src, numpy.uint32(w), numpy.uint32(h))
-
-
+        return self.kernel(
+            queue, (w, h), None, tgt, src, numpy.uint32(w), numpy.uint32(h)
+        )
 
 
 class TransposeWithLocal:
     def __init__(self, ctx):
-        self.kernel = cl.Program(ctx, """
+        self.kernel = (
+            cl.Program(
+                ctx,
+                """
         #define BLOCK_SIZE %(block_size)d
         #define A_BLOCK_STRIDE (BLOCK_SIZE * a_width)
         #define A_T_BLOCK_STRIDE (BLOCK_SIZE * a_height)
@@ -71,8 +79,10 @@ class TransposeWithLocal:
             get_group_id(1) * BLOCK_SIZE +
             get_group_id(0) * A_T_BLOCK_STRIDE;
 
-          int glob_idx_a   = base_idx_a + get_local_id(0) + a_width * get_local_id(1);
-          int glob_idx_a_t = base_idx_a_t + get_local_id(0) + a_height * get_local_id(1);
+          int glob_idx_a   =
+            base_idx_a + get_local_id(0) + a_width * get_local_id(1);
+          int glob_idx_a_t =
+            base_idx_a_t + get_local_id(0) + a_height * get_local_id(1);
 
           a_local[get_local_id(1)*BLOCK_SIZE+get_local_id(0)] = a[glob_idx_a];
 
@@ -80,18 +90,28 @@ class TransposeWithLocal:
 
           a_t[glob_idx_a_t] = a_local[get_local_id(0)*BLOCK_SIZE+get_local_id(1)];
         }
-        """% {"block_size": block_size}).build().transpose
+        """
+                % {"block_size": block_size},
+            )
+            .build()
+            .transpose
+        )
 
     def __call__(self, queue, tgt, src, shape):
         w, h = shape
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h), (block_size, block_size),
-            tgt, src, numpy.uint32(w), numpy.uint32(h),
-            cl.LocalMemory(4*block_size*(block_size+1)))
-
-
+        return self.kernel(
+            queue,
+            (w, h),
+            (block_size, block_size),
+            tgt,
+            src,
+            numpy.uint32(w),
+            numpy.uint32(h),
+            cl.LocalMemory(4 * block_size * (block_size + 1)),
+        )
 
 
 def transpose_using_cl(ctx, queue, cpu_src, cls):
@@ -110,9 +130,6 @@ def transpose_using_cl(ctx, queue, cpu_src, cls):
     return result
 
 
-
-
-
 def check_transpose():
     for cls in [NaiveTranspose, SillyTranspose, TransposeWithLocal]:
         print("checking", cls.__name__)
@@ -124,7 +141,7 @@ def check_transpose():
         queue = cl.CommandQueue(ctx)
 
         for i in numpy.arange(10, 13, 0.125):
-            size = int(((2**i) // 32) * 32)
+            size = int(((2 ** i) // 32) * 32)
             print(size)
 
             source = numpy.random.rand(size, size).astype(numpy.float32)
@@ -136,20 +153,18 @@ def check_transpose():
             assert err_norm == 0, (size, err_norm)
 
 
-
-
 def benchmark_transpose():
     ctx = cl.create_some_context()
 
     for dev in ctx.devices:
         assert dev.local_mem_size > 0
 
-    queue = cl.CommandQueue(ctx,
-            properties=cl.command_queue_properties.PROFILING_ENABLE)
+    queue = cl.CommandQueue(
+        ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
+    )
 
-    sizes = [int(((2**i) // 32) * 32)
-            for i in numpy.arange(10, 13, 0.125)]
-            #for i in numpy.arange(10, 10.5, 0.125)]
+    sizes = [int(((2 ** i) // 32) * 32) for i in numpy.arange(10, 13, 0.125)]
+    # for i in numpy.arange(10, 10.5, 0.125)]
 
     mem_bandwidths = {}
 
@@ -168,36 +183,44 @@ def benchmark_transpose():
             a_t_buf = cl.Buffer(ctx, mf.WRITE_ONLY, size=source.nbytes)
             method = cls(ctx)
 
-            for i in range(4):
+            for _i in range(4):
                 method(queue, a_t_buf, a_buf, source.shape)
 
             count = 12
             events = []
-            for i in range(count):
+            for _i in range(count):
                 events.append(method(queue, a_t_buf, a_buf, source.shape))
 
             events[-1].wait()
             time = sum(evt.profile.end - evt.profile.start for evt in events)
 
-            mem_bw = 2*source.nbytes*count/(time*1e-9)
-            print("benchmarking", name, size, mem_bw/1e9, "GB/s")
+            mem_bw = 2 * source.nbytes * count / (time * 1e-9)
+            print("benchmarking", name, size, mem_bw / 1e9, "GB/s")
             meth_mem_bws.append(mem_bw)
 
             a_buf.release()
             a_t_buf.release()
 
     try:
-        from matplotlib.pyplot import clf, plot, title, xlabel, ylabel, \
-                savefig, legend, grid
+        from matplotlib.pyplot import (
+            clf,
+            plot,
+            xlabel,
+            ylabel,
+            savefig,
+            legend,
+            grid,
+        )
     except ModuleNotFoundError:
         pass
     else:
         for i in range(len(methods)):
             clf()
-            for j in range(i+1):
+            for j in range(i + 1):
                 method = methods[j]
                 name = method.__name__.replace("Transpose", "")
-                plot(sizes, numpy.array(mem_bandwidths[method])/1e9, "o-", label=name)
+                plot(sizes, numpy.array(mem_bandwidths[method]) / 1e9, "o-",
+                        label=name)
 
             xlabel("Matrix width/height $N$")
             ylabel("Memory Bandwidth [GB/s]")
@@ -209,4 +232,3 @@ def benchmark_transpose():
 
 check_transpose()
 benchmark_transpose()
-
diff --git a/setup.cfg b/setup.cfg
index 845fb8c484af1df89a79136b4bd54f97d89de4c1..59c86e50063bdd72882cd937a252acdf6e8e5ef4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,12 @@
 [flake8]
 ignore = E126,E127,E128,E123,E226,E241,E242,E265,W503,E402
 max-line-length=85
-exclude=pyopencl/compyte/ndarray,pyopencl/compyte/array.py
+exclude=pyopencl/compyte/ndarray,pyopencl/compyte/array.py,gl_particle_animation.py,gl_interop_demo.py
+
+per-file-ignores=
+        examples/pi-monte-carlo.py:N,B
+        examples/black-hole-accretion.py:N
+        examples/n-body.py:N
 
 inline-quotes = "
 docstring-quotes = """