Tile rendering with opengl

Let's start by considering this simple snippet:

import ctypes
import textwrap
import time

import glfw
import numpy as np
from OpenGL.GL import *
from OpenGL.GLU import *
from OpenGL.GLUT import *

import glm

GLSL_VERSION = "#version 440\n"
CONTEXT_VERSION = (4, 1)


def vs_shader(text):
    return GLSL_VERSION + textwrap.dedent(text)


def shader(text):
    prefix = textwrap.dedent("""\
        uniform float iTime;
        uniform int iFrame;
        uniform vec3 iResolution;
        uniform sampler2D iChannel0;
        uniform vec2 iOffset;
        out vec4 frag_color;
    """)
    suffix = textwrap.dedent("""\
        void main() {
            mainImage(frag_color, gl_FragCoord.xy + iOffset);
        }
    """)

    return GLSL_VERSION + prefix + textwrap.dedent(text) + suffix


VS = vs_shader("""\
    layout(location = 0) in vec3 in_position;

    uniform mat4 mvp;

    void main()
    {
        gl_Position = mvp * vec4(in_position, 1.0f);
    }
""")

SIMPLE = [
    shader("""
        void mainImage( out vec4 fragColor, in vec2 fragCoord )
        {
            vec2 uv = fragCoord.xy / iResolution.xy;
            float tile_size = 4;
            vec2 g = floor(vec2(tile_size, tile_size) * uv);
            float c = mod(g.x + g.y, 2.0);
            if (uv.x<0.5 && uv.y<0.5)
                fragColor = vec4(mix(vec3(c), vec3(1), vec3(1,0,1)), 1.0);
            else if (uv.x>=0.5 && uv.y<0.5)
                fragColor = vec4(mix(vec3(c), vec3(1), vec3(1,0,0)), 1.0);
            else if (uv.x<0.5 && uv.y>=0.5)
                fragColor = vec4(mix(vec3(c), vec3(1), vec3(0,1,0)), 1.0);
            else if (uv.x>=0.5 && uv.y>=0.5)
                fragColor = vec4(mix(vec3(c), vec3(1), vec3(0,0,1)), 1.0);
        }
    """),
    shader("""
        void mainImage( out vec4 fragColor, in vec2 fragCoord )
        {
            vec2 uv = fragCoord/iResolution.xy;
            fragColor = vec4(texture(iChannel0, uv).rgb,1.0);
        }
    """)
]


# -------- MINIFRAMEWORK --------
class Tiler:

    def __init__(self, scene_width, scene_height):
        self.scene_width = scene_width
        self.scene_height = scene_height

    @classmethod
    def from_num_tiles(cls, scene_width, scene_height, num_tiles_x, num_tiles_y):
        obj = cls(scene_width, scene_height)
        obj.num_tiles_x = num_tiles_x
        obj.num_tiles_y = num_tiles_y
        obj.tile_width = obj.scene_width // num_tiles_x
        obj.tile_height = obj.scene_height // num_tiles_y
        return obj

    @classmethod
    def from_size(cls, scene_width, scene_height, tile_width, tile_height):
        obj = cls(scene_width, scene_height)
        obj.num_tiles_x = obj.scene_width // tile_width
        obj.num_tiles_y = obj.scene_height // tile_height
        obj.tile_width = tile_width
        obj.tile_height = tile_height
        return obj

    @property
    def num_tiles(self):
        return self.num_tiles_y * self.num_tiles_x


class TextureF32():

    def __init__(self, width, height):
        target = GL_TEXTURE_2D
        self.target = target
        self.identifier = glGenTextures(1)

        glPixelStorei(GL_UNPACK_ALIGNMENT, 1)
        glBindTexture(target, self.identifier)
        glTexImage2D(target, 0, GL_RGBA32F, width, height, 0, GL_RGBA, GL_FLOAT, None)
        glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)
        glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)
        self.set_filter()

        glBindTexture(target, 0)

    def set_filter(self):
        glTexParameteri(self.target, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
        glTexParameteri(self.target, GL_TEXTURE_MIN_FILTER, GL_NEAREST)

    def bind(self):
        glBindTexture(self.target, self.identifier)

    def unbind(self):
        glBindTexture(self.target, 0)


class FboF32():

    def __init__(self, width, height):
        self.target = GL_FRAMEBUFFER
        self.identifier = glGenFramebuffers(1)
        glBindFramebuffer(GL_FRAMEBUFFER, self.identifier)

        # Color attachments
        tex = TextureF32(width, height)
        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex.identifier, 0)
        glDrawBuffers(1, [GL_COLOR_ATTACHMENT0])
        self.colors = [tex]

        self.width = width
        self.height = height

        if glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE:
            raise Exception(
                f"ERROR::FRAMEBUFFER:: Framebuffer {self.identifier} is not complete!"
            )

        glBindFramebuffer(GL_FRAMEBUFFER, 0)

    def delete(self):
        self.glDeleteFramebuffers(self.identifier)

    def rect(self):
        return [0, 0, self.width, self.height]

    def bind(self):
        glBindFramebuffer(GL_FRAMEBUFFER, self.identifier)


def set_uniform1f(prog, name, v0):
    glUniform1f(glGetUniformLocation(prog, name), v0)


def set_uniform1i(prog, name, v0):
    glUniform1i(glGetUniformLocation(prog, name), v0)


def set_uniform2i(prog, name, v0, v1):
    glUniform2i(glGetUniformLocation(prog, name), v0, v1)


def set_uniform2f(prog, name, v0, v1):
    glUniform2f(glGetUniformLocation(prog, name), v0, v1)


def set_uniform3f(prog, name, v0, v1, v2):
    glUniform3f(glGetUniformLocation(prog, name), v0, v1, v2)


def set_uniform_mat4(prog, name, mat):
    glUniformMatrix4fv(glGetUniformLocation(prog, name), 1, GL_FALSE, glm.value_ptr(mat))


def set_uniform_texture(prog, name, resource, unit_texture):
    glActiveTexture(GL_TEXTURE0 + unit_texture)
    resource.bind()
    resource.set_filter()
    glUniform1i(glGetUniformLocation(prog, name), 0 + unit_texture)


def create_quad(x0, y0, x1, y1):
    data = np.array([
        x0, y0, 0,
        x1, y0, 0,
        x0, y1, 0,

        x1, y0, 0,
        x1, y1, 0,
        x0, y1, 0,
    ], dtype=np.float32)

    vbo = glGenBuffers(1)
    glBindBuffer(GL_ARRAY_BUFFER, vbo)
    glBufferData(GL_ARRAY_BUFFER, data, GL_STATIC_DRAW)

    vao = glGenVertexArrays(1)
    glBindVertexArray(vao)
    glVertexAttribPointer(0, 3, GL_FLOAT, False, 0, ctypes.c_void_p(0))
    glEnableVertexAttribArray(0)

    return vao


def compile(shader_type, source):
    identifier = glCreateShader(shader_type)
    glShaderSource(identifier, source)
    glCompileShader(identifier)

    if not glGetShaderiv(identifier, GL_COMPILE_STATUS):
        for i, l in enumerate(source.splitlines()):
            print(f"{i+1}: {l}")
        raise Exception(glGetShaderInfoLog(identifier).decode("utf-8"))

    return identifier


def create_program(vs, fs):
    vs_identifier = compile(GL_VERTEX_SHADER, vs)
    fs_identifier = compile(GL_FRAGMENT_SHADER, fs)

    program = glCreateProgram()
    glAttachShader(program, vs_identifier)
    glAttachShader(program, fs_identifier)
    glLinkProgram(program)
    if not glGetProgramiv(program, GL_LINK_STATUS):
        raise RuntimeError(glGetProgramInfoLog(program))

    return program


# -------- Glut/Glfw --------
class Effect:

    def __init__(self, w, h, num_tiles_x, num_tiles_y, passes):
        self.fbos = []
        self.needs_updating = True
        self.allocations = 0
        self.tiler = Tiler.from_num_tiles(w, h, num_tiles_x, num_tiles_y)

        self.passes = [create_program(VS, rp) for rp in passes]
        self.iframe = 0
        self.start_time = time.time()

        self.quad = create_quad(-1, -1, 1, 1)
        self.view = glm.lookAt(
            glm.vec3(0, 0, 10),
            glm.vec3(0, 0, 0),
            glm.vec3(0, 1, 0)
        )
        self.model = glm.mat4(1)
        glEnable(GL_DEPTH_TEST)

        # print("GL_MAX_VIEWPORT_DIMS:", glGetIntegerv(GL_MAX_VIEWPORT_DIMS))
        # print("GL_MAX_TEXTURE_SIZE:", glGetIntegerv(GL_MAX_TEXTURE_SIZE))
        # print("GL_MAX_RENDERBUFFER_SIZE:", glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE))

    def mem_info(self):
        GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX = 0x9048
        GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX = 0x9049
        total_mem_kb = glGetIntegerv(GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX)
        cur_avail_mem_kb = glGetIntegerv(GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX)
        return f"total_mem_kb={total_mem_kb} cur_avail_mem_kb={cur_avail_mem_kb}"

    def create_fbo(self, tiler):
        return [
            FboF32(width=tiler.tile_width, height=tiler.tile_height)
            for i in range(tiler.num_tiles)
        ]

    def make_ortho(self, x, y, num_tiles_x, num_tiles_y, left, right, bottom, top, near, far):
        # References
        #
        # https://www.opengl.org/archives/resources/code/samples/advanced/advanced97/notes/node20.html
        # https://stackoverflow.com/questions/6490728/capture-snapshot-of-opengl-window-with-very-large-image-resolution
        #
        offset_x = (right - left) / num_tiles_x
        offset_y = (top - bottom) / num_tiles_y
        l = left + offset_x * x
        r = left + offset_x * (x + 1)
        b = bottom + offset_y * y
        t = bottom + offset_y * (y + 1)
        n = near
        f = far
        print(f"x={x} y={y} left={l} right={r} bottom={b} top={t}")
        return glm.ortho(l, r, b, t, n, f)

    def render_pass(self, rp, mvp, w, h, channel0, offset_x=0, offset_y=0):
        t = time.time() - self.start_time

        glBindVertexArray(self.quad)
        glUseProgram(rp)
        set_uniform_mat4(rp, "mvp", mvp)
        set_uniform1f(rp, "iTime", t)
        set_uniform1i(rp, "iFrame", self.iframe)
        set_uniform3f(rp, "iResolution", w, h, w / h)
        set_uniform2f(rp, "iOffset", offset_x, offset_y)
        if channel0:
            set_uniform_texture(rp, "iChannel0", channel0, self.active_texture)
            self.active_texture += 1
        glDrawArrays(GL_TRIANGLES, 0, 6)

    # No tile rendering
    def render_no_tiles(self, window_width, window_height):
        self.active_texture = 0

        if self.needs_updating:
            if not self.fbos:
                print(f"Creating fbos, allocations={self.allocations} {self.mem_info()}")
                self.fbos = [
                    FboF32(width=window_width, height=window_height),
                    FboF32(width=window_width, height=window_height)
                ]

        # clear buffers
        if self.iframe == 0:
            for fbo in self.fbos:
                fbo.bind()
                glViewport(*fbo.rect())
                glClearColor(0, 0, 0, 0)
                glClear(GL_COLOR_BUFFER_BIT)

        proj = glm.ortho(-1, 1, -1, 1, -100, 100)
        mvp = proj * self.view * self.model

        # Pass0: BufferA - Channels [BufferA, None, None, None]
        fbo0 = self.fbos[0]
        fbo1 = self.fbos[1]
        w, h = fbo0.width, fbo0.height
        rp = self.passes[0]
        fbo0.bind()
        glViewport(0, 0, w, h)
        self.render_pass(rp, mvp, w, h, fbo1.colors[0])

        # Pass1: Image - Channels [BufferA, None, None, None]
        glBindFramebuffer(GL_FRAMEBUFFER, 0)
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        fbo0 = self.fbos[0]
        w, h = window_width, window_height
        rp = self.passes[1]
        glViewport(0, 0, w, h)
        self.render_pass(rp, mvp, w, h, fbo0.colors[0])

        # ping-pong
        self.fbos.reverse()

        self.iframe += 1

    # Tile rendering
    def render_tiles(self, window_width, window_height):
        M = self.tiler.num_tiles_x
        N = self.tiler.num_tiles_y
        offset_x = window_width // M
        offset_y = window_height // N
        proj = glm.ortho(-1, 1, -1, 1, -100, 100)

        # -------- Test --------
        # glBindFramebuffer(GL_FRAMEBUFFER, 0)
        # glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        # self.active_texture = 0

        # for y in range(N):
        #     for x in range(M):
        #         w, h = window_width, window_height
        #         mvp = proj * self.view * self.model
        #         glViewport(offset_x * x, offset_y * y, self.tiler.tile_width, self.tiler.tile_height)
        #         self.render_pass(self.passes[0], mvp, w, h, None, offset_x * x, offset_y * y)

        # return

        # -------- Test2 --------
        self.active_texture = 0

        if self.needs_updating:
            if not self.fbos:
                print(f"Creating fbos, allocations={self.allocations} {self.mem_info()}")
                self.fbos = [
                    self.create_fbo(self.tiler),
                    self.create_fbo(self.tiler),
                ]

        # clear buffers
        if self.iframe == 0:
            for fbo_tiles in self.fbos:
                for fbo in fbo_tiles:
                    fbo.bind()
                    glViewport(*fbo.rect())
                    glClearColor(0, 0, 0, 0)
                    glClear(GL_COLOR_BUFFER_BIT)

        # Pass0: BufferA - Channels [BufferA, None, None, None]
        for y in range(N):
            for x in range(M):
                fbo0 = self.fbos[0][y * M + x]
                fbo1 = self.fbos[1][y * M + x]
                w, h, aspect = fbo0.width, fbo0.height, fbo0.width / fbo0.height
                mvp = proj * self.view * self.model
                rp = self.passes[0]
                fbo0.bind()
                glViewport(0, 0, self.tiler.tile_width, self.tiler.tile_height)
                self.render_pass(rp, mvp, w, h, fbo1.colors[0], offset_x * x, offset_y * y)

        # Pass1: Image - Channels [BufferA, None, None, None]
        glBindFramebuffer(GL_FRAMEBUFFER, 0)
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)

        for y in range(N):
            for x in range(M):
                fbo0 = self.fbos[0][y * M + x]
                fbo1 = self.fbos[1][y * M + x]
                w, h, aspect = window_width, window_height, window_width / window_height
                mvp = proj * self.view * self.model
                rp = self.passes[1]
                glViewport(offset_x * x, offset_y * y, self.tiler.tile_width, self.tiler.tile_height)
                self.render_pass(rp, mvp, w, h, fbo0.colors[0], 0, 0)

        # ping-pong
        self.fbos.reverse()

        self.iframe += 1


class WindowGlut:

    def __init__(self, w, h, use_tiles, num_tiles_x, num_tiles_y, passes):
        glutInit()
        glutInitContextVersion(*CONTEXT_VERSION)
        glutInitContextProfile(GLUT_CORE_PROFILE)
        glutInitContextFlags(GLUT_FORWARD_COMPATIBLE)
        glutSetOption(GLUT_MULTISAMPLE, 16)
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH | GLUT_MULTISAMPLE)
        glutInitWindowSize(w, h)
        glutCreateWindow('Mcve')
        glutReshapeFunc(self.reshape)
        glutKeyboardFunc(self.keyboard_func)
        glutKeyboardUpFunc(self.keyboard_up_func)
        glutDisplayFunc(self.display)
        glutIdleFunc(self.idle_func)
        self.keys = {chr(i): False for i in range(256)}
        self.effect = Effect(w, h, num_tiles_x, num_tiles_y, passes)

        self.start_time = time.time()
        self.num_frames = 0

        if use_tiles:
            print("TILE RENDERING ENABLED")
            self.render = self.effect.render_tiles
        else:
            print("TILE RENDERING DISABLED")
            self.render = self.effect.render_no_tiles

    def keyboard_func(self, *args):
        self.keys[args[0].decode("utf8")] = True

    def keyboard_up_func(self, *args):
        self.keys[args[0].decode("utf8")] = False

    def display(self):
        if self.keys['r']:
            self.effect.iframe = 0

        self.render(self.window_width, self.window_height)

        glutSwapBuffers()
        self.num_frames += 1

        t = time.time() - self.start_time
        if t >= 1:
            glutSetWindowTitle(f"Fps: {self.num_frames}")
            self.start_time = time.time()
            self.num_frames = 0

    def run(self):
        glutMainLoop()

    def idle_func(self):
        glutPostRedisplay()

    def reshape(self, w, h):
        glViewport(0, 0, w, h)
        self.window_width = w
        self.window_height = h


class WindowGlfw:

    def __init__(self, w, h, use_tiles, num_tiles_x, num_tiles_y, passes):
        # Initialize the library
        if not glfw.init():
            return

        # Create a windowed mode window and its OpenGL context
        glfw.window_hint(glfw.CONTEXT_VERSION_MAJOR, CONTEXT_VERSION[0])
        glfw.window_hint(glfw.CONTEXT_VERSION_MINOR, CONTEXT_VERSION[1])
        glfw.window_hint(glfw.OPENGL_FORWARD_COMPAT, GL_TRUE)
        glfw.window_hint(glfw.OPENGL_PROFILE, glfw.OPENGL_CORE_PROFILE)
        window = glfw.create_window(w, h, "Mcve", None, None)
        if not window:
            glfw.terminate()
            return

        glfw.set_window_size_callback(window, self.reshape)
        glfw.set_key_callback(window, self.keyboard_func)

        # Make the window's context current
        glfw.make_context_current(window)
        self.window = window

        self.keys = {chr(i): False for i in range(256)}
        self.effect = Effect(w, h, num_tiles_x, num_tiles_y, passes)
        self.window_width = w
        self.window_height = h

        if use_tiles:
            print("TILE RENDERING ENABLED")
            self.render = self.effect.render_tiles
        else:
            print("TILE RENDERING DISABLED")
            self.render = self.effect.render_no_tiles

    def keyboard_func(self, window, key, scancode, action, mods):
        self.keys[chr(key)] = action

    def display(self):
        if self.keys['R']:
            self.iframe = 0

        self.render(self.window_width, self.window_height)

    def run(self):
        window = self.window

        while not glfw.window_should_close(window):
            self.display()
            glfw.swap_buffers(window)
            glfw.poll_events()

        glfw.terminate()

    def reshape(self, window, w, h):
        glViewport(0, 0, w, h)
        self.window_width = w
        self.window_height = h


if __name__ == '__main__':
    params = {
        "w": 320,
        "h": 240,
        "use_tiles": True,
        "num_tiles_x": 2,
        "num_tiles_y": 2,
        "passes": SIMPLE
    }
    use_glut = True
    WindowGlut(**params).run() if use_glut else WindowGlfw(**params).run()

To run this code you'll need to install numpy, pyopengl, glfw, PyGLM. You can switch between glfw or glut by toggling the variable use_glut. I've added this options as it seems running glut on macosx may be tricky in certain cases.

Anyway, the goal of this thread is to figure out how to fix the buggy snippet to make proper tile rendering, as you can see right now there is a very naive attempt implemented.

In the main block you can specify if you want to use a render method using tiles or not (use_tiles variable), if you choose using tiles you'll need to specify the number of them (num_tiles_x, num_tiles_y).

Cases:

If you run it with "use_tiles": False you'll see this output:

that output is correct
If you run it with "use_tiles": True, "num_tiles_x": 2, "num_tiles_y": 2 you should see the same output than 1). Also correct
But if you run it with "use_tiles": True, "num_tiles_x": 4, "num_tiles_y": 4 or higher you'll start seeing a totally screwed up image like below:

QUESTION: What's the bug of my tile rendering code that's producing the wrong output? How would you fix it?

Also... Even if the code is fixed the way I'm trying to make tile rendering is quite naive and it won't work very well when dealing with more complex effects where passes need to read back from adjacent tiles or even worst, non-adjacent tiles. For the case of adjacent tiles I've been told adding some padding to the tiles would work pretty well but for the more general case i don't have a clue how'd you tackle that problem. In any case, one step at a time, the goal of this thread would be fixing the buggy snippet

Solution

In the first pass a single tile is rendered to a framebuffer, which has exactly the size of the tile. gl_FragCoord.xy is (0,0) at the bottom left of the tile. uv = (0,0) has to be at the bottom left of the window and uv = (1, 1) at the top right of the window. To calculate the the uv coordinate in respect to the window, you've to add the offset of the tile to gl_FragCoord.xy and to divide by the size of the window:

formula (pseudo code):

uv = (gl_FragCoord.xy + (offset_x*x, offset_y*y)) / (window_width, window_height)


    +------------------+
    |                  |
    |    +----+        |
    |    |    |        |
    |    +----+        |
    |  (0,0) tile = gl_FragCoord.xy
    |                  |
    +------------------+
 (0,0) window

In the first pass, iResolution has to be (window_width, window_height) and iOffset has to be (offset_x * x, offset_y * y).

# Pass0: BufferA - Channels [BufferA, None, None, None]
for y in range(N):
    for x in range(M):
        fbo0 = self.fbos[0][y * M + x]
        fbo1 = self.fbos[1][y * M + x]
        mvp = proj * self.view * self.model
        rp = self.passes[0]
        fbo0.bind()

        glViewport(0, 0, self.tiler.tile_width, self.tiler.tile_height)

        w, h   = window_width, window_height
        aspect = window_width / window_height
        self.render_pass(rp, mvp, w, h, fbo1.colors[0], offset_x * x, offset_y * y)

In the second pass a single tile is read form the texture and renderd to the window (default framebuffer 0). The source texture (tile) has exactly the size of a tile and the uv coordinate has to be calculated in respect to the tile texture. gl_FragCoord.xy is (0,0) at the bottom left of the window. uv = (0,0) has to be at the bottom left of the tile and uv = (1, 1) at the top right of the tile. To calculate the uv coordinate the offset of the tile has to subtracted from gl_FragCoord.xy and the result has to be divided by the size of a title :

formula (pseudo code)

uv = (gl_FragCoord.xy - (offset_x*x, offset_y*y)) / (tile_width, tile_height)


    +------------------+
    |                  |
    |    +----+        |
    |    |    |        |
    |    +----+        |
    |  (0,0) tile      |
    |                  |
    +------------------+
  (0,0) window = gl_FragCoord.xy

In the 2nd pass, iResolution has to be (self.tiler.tile_width, self.tiler.tile_height) and iOffset has to be (-offset_x * x, -offset_y * y).

# Pass1: Image - Channels [BufferA, None, None, None]
glBindFramebuffer(GL_FRAMEBUFFER, 0)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)

for y in range(N):
    for x in range(M):
        fbo0 = self.fbos[0][y * M + x]
        fbo1 = self.fbos[1][y * M + x]
        mvp = proj * self.view * self.model
        rp = self.passes[1]

        glViewport(offset_x*x, offset_y*y, self.tiler.tile_width, self.tiler.tile_height)

        w, h   = self.tiler.tile_width, self.tiler.tile_height
        aspect = self.tiler.tile_width / self.tiler.tile_height
        self.render_pass(rp, mvp, w, h, fbo0.colors[0], -offset_x * x, -offset_y * y)

Edit for mcve.py

In this case the render target is always a framebuffer with the size of a tile The 2nd render pass ("Pass1") reads from a tile and stores to the destination tile, so the 2nd pass has to be:

# Pass1: Image - Channels [BufferA, None, None, None]
for y in range(N):
    for x in range(M):
        fbo_dst = self.fbo_target[0][y * M + x]
        fbo_src = self.fbos[0][y * M + x]
        mvp = proj * self.view * self.model
        rp = self.passes[1]

        fbo_dst.bind()
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        glViewport(0, 0, self.tiler.tile_width, self.tiler.tile_height)

        w, h   = self.tiler.tile_width, self.tiler.tile_height
        aspect = self.tiler.tile_width / self.tiler.tile_height
        self.render_pass(rp, mvp, w, h, fbo_src.colors[0], 0, 0)

A further issue is the reading of the texture form the previous frame in the fragments shader. The size of the texture is always a the size of a tile. The bottom left coordinate of the texture is (0, 0) and the top right coordinate is (1, 1). So for the calculation of the texture coordinate (st), the offset has to be skipped and the resolution is given by the size of the texture (textureSize):

void mainImage( out vec4 fragColor, in vec2 fragCoord ) {
    initSpheres();

    # issue is here
    // vec2 st = fragCoord.xy / iResolution.xy; <--- delete
    vec2 st = gl_FragCoord.xy / vec2(textureSize(iChannel0, 0));

    // [...]

    // Moving average (multipass code)
    vec3 color = texture(iChannel0, st).rgb * float(iFrame);
    // [...]
}

See the result:

If you don't want to change the shader code in mainImage, then a different approach is to trick the system and to delegate the texture look up to a different function, by a macro. e.g.:

def shader(tileTextureLookup, text):

    prefix = textwrap.dedent("""\
        uniform float iTime;
        uniform int iFrame;
        uniform vec3 iResolution;
        uniform sampler2D iChannel0;
        uniform vec2 iOffset;
        out vec4 frag_color;
    """)

    textureLookup = ""
    if tileTextureLookup:
        textureLookup = textwrap.dedent("""\
            vec4 textureTile(sampler2D sampler, vec2 uv) {
                vec2 st = (uv * iResolution.xy - iOffset.xy) / vec2(textureSize(sampler, 0));
                return texture(sampler, st); 
            }

            #define texture textureTile
        """)

    suffix = textwrap.dedent("""\
        void main() {
            mainImage(frag_color, gl_FragCoord.xy + iOffset);
        }
     """)

    return GLSL_VERSION + prefix + textureLookup + textwrap.dedent(text) + suffix

SMALLPT_MULTIPASS = [
    shader(True, """\
        // All code here is by Zavie (https://www.shadertoy.com/view/4sfGDB#)

        // [...]

    """),
    shader(False, """\
        // A simple port of Zavie's GLSL smallpt that uses multipass.
        // Original source: https://www.shadertoy.com/view/4sfGDB#

        void mainImage( out vec4 fragColor, in vec2 fragCoord ) {
            vec2 uv = fragCoord.xy / iResolution.xy;
            vec3 color = texture(iChannel0, uv).rgb;

            fragColor = vec4(pow(clamp(color, 0., 1.), vec3(1./2.2)), 1.);
        }
    """)
]

But note, texture is an overloaded function and this approach works for 2 dimensional textures only. Furthermore there other look up functions like texelFetch, too.