Search code examples
pythonhdf5pyqtgraph

Loading a hdf5 file and displaying the data with pyqtgraph


I would like to show the data of a hdf5 file in the ImageView() class from pyqtgraph. The bare code of displaying the plot for ImageView() is:

from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph as pg

# Interpret image data as row-major instead of col-major
pg.setConfigOptions(leftButtonPan = False, imageAxisOrder='row-major')

app = QtGui.QApplication([])

## Create window with ImageView widget
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')

if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

There is however also a hdf5 example in the pyqtgraph example set. I'm unfortunately not able to get it to work. I made some alterations to the example to make it work for my needs but I'm getting an error. Here is first the code:

import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui

pg.mkQApp()

plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)


class HDF5Plot(pg.ImageItem):
    def __init__(self, *args, **kwds):
        self.hdf5 = None
        self.limit = 10000  # maximum number of samples to be plotted
        pg.ImageItem.__init__(self, *args, **kwds)

    def setHDF5(self, data):
        self.hdf5 = data
        self.updateHDF5Plot()

    def viewRangeChanged(self):
        self.updateHDF5Plot()

    def updateHDF5Plot(self):
        if self.hdf5 is None:
            self.setData([])
            return

        vb = self.getViewBox()
        if vb is None:
            return  # no ViewBox yet

        # Determine what data range must be read from HDF5
        xrange = vb.viewRange()[0]
        start = max(0, int(xrange[0]) - 1)
        stop = min(len(self.hdf5), int(xrange[1] + 2))

        # Decide by how much we should downsample
        ds = int((stop - start) / self.limit) + 1

        if ds == 1:
            # Small enough to display with no intervention.
            visible = self.hdf5[start:stop]
            scale = 1
        else:
            # Here convert data into a down-sampled array suitable for visualizing.
            # Must do this piecewise to limit memory usage.
            samples = 1 + ((stop - start) // ds)
            visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
            sourcePtr = start
            targetPtr = 0

            # read data in chunks of ~1M samples
            chunkSize = (1000000 // ds) * ds
            while sourcePtr < stop - 1:
                chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
                sourcePtr += len(chunk)

                # reshape chunk to be integral multiple of ds
                chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)

                # compute max and min
                chunkMax = chunk.max(axis=1)
                chunkMin = chunk.min(axis=1)

                # interleave min and max into plot data to preserve envelope shape
                visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
                visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
                targetPtr += chunk.shape[0] * 2

            visible = visible[:targetPtr]
            scale = ds * 0.5

        self.setData(visible)  # update the plot
        self.setPos(start, 0)  # shift to match starting index
        self.resetTransform()
        self.scale(scale, 1)  # scale to match downsampling


f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':

    import sys

    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

And here is the error:

Traceback (most recent call last):
  File "pyqtg.py", line 206, in <module>
    curve.setHDF5(f['data'])
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/h5py-3.3.0-py3.8-linux-x86_64.egg/h5py/_hl/group.py", line 305, in __getitem__
    oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5o.pyx", line 190, in h5py.h5o.open
KeyError: "Unable to open object (object 'data' doesn't exist)"

The problem is that I don't know what/how the hdf5 file looks so I am unsure how to replace 'data' with the correct term or if it is completely different in and of itself. Any help is greatly appreciated.

Edit 1: I got the examples from running python -m pyqtgraph.examples. Once the GUI pops up down the list you'll see "HDF5 Big Data". My code stems from that example. And from the examples the third one from the top, ImageView, is the code I would like to use to show the HDF5 file.

Edit 2: Here is the result of running the second part of the code kcw78: http://pastie.org/p/3scRyUm1ZFVJNMwTHQHCBv

Edit 3: So I ran the code above but made a small change with the help from kcw78. I changed:

f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)

to:

with h5py.File('test.hdf5', 'r') as h5f:
    curve = HDF5Plot()
    curve.setHDF5(h5f['aggea'])
    plt.addItem(curve)

And got the errors:

Traceback (most recent call last):
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
    self.parentChanged()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
    self._updateView()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
    self.viewRangeChanged()
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
    self.parentChanged()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
    self._updateView()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
    self.viewRangeChanged()
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'

Edit 4:

Here is a photo of the results: https://i.sstatic.net/XBXf9.jpg. I get the same empty results from both creating a 2d hdf5 file and using my 2d data file.

with h5py.File('mytest.hdf5', 'r') as h5fr, \
     h5py.File('test_1d.hdf5', 'w') as h5fw:
    arr = h5fr['aggea'][:].reshape(-1,)
    h5fw.create_dataset('data', data=arr)
    print(h5fw['data'].shape, h5fw['data'].dtype)

Edit 5: The code that runs and plots

import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui

pg.mkQApp()

plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)

class HDF5Plot(pg.PlotCurveItem):
    def __init__(self, *args, **kwds):
        self.hdf5 = None
        self.limit = 10000  # maximum number of samples to be plotted
        pg.PlotCurveItem.__init__(self, *args, **kwds)

    def setHDF5(self, data):
        self.hdf5 = data
        self.updateHDF5Plot()

    def viewRangeChanged(self):
        self.updateHDF5Plot()

    def updateHDF5Plot(self):
        if self.hdf5 is None:
            self.setData([])
            return

        vb = self.getViewBox()
        if vb is None:
            return  # no ViewBox yet

        # Determine what data range must be read from HDF5
        xrange = vb.viewRange()[0]
        start = max(0, int(xrange[0]) - 1)
        stop = min(len(self.hdf5), int(xrange[1] + 2))

        # Decide by how much we should downsample
        ds = int((stop - start) / self.limit) + 1

        if ds == 1:
            # Small enough to display with no intervention.
            visible = self.hdf5[start:stop]
            scale = 1
        else:
            # Here convert data into a down-sampled array suitable for visualizing.
            # Must do this piecewise to limit memory usage.
            samples = 1 + ((stop - start) // ds)
            visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
            sourcePtr = start
            targetPtr = 0

            # read data in chunks of ~1M samples
            chunkSize = (1000000 // ds) * ds
            while sourcePtr < stop - 1:
                chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
                sourcePtr += len(chunk)

                # reshape chunk to be integral multiple of ds
                chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)

                # compute max and min
                chunkMax = chunk.max(axis=1)
                chunkMin = chunk.min(axis=1)

                # interleave min and max into plot data to preserve envelope shape
                visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
                visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
                targetPtr += chunk.shape[0] * 2

            visible = visible[:targetPtr]
            scale = ds * 0.5

        self.setData(visible)  # update the plot
        self.setPos(start, 0)  # shift to match starting index
        self.resetTransform()
        self.scale(scale, 1)  # scale to match downsampling


with h5py.File('mytest.hdf5', 'r') as h5fr, \
     h5py.File('test_1d.hdf5', 'w') as h5fw:
    arr = h5fr['aggea'][:].reshape(-1,)
    h5fw.create_dataset('data', data=arr)
    curve = HDF5Plot()
    curve.setHDF5(h5fw['data'])
    plt.addItem(curve)

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':

    import sys

    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

Edit 6: What worked in the end:

from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
import matplotlib.pyplot as plt

app = QtGui.QApplication([])

win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')

with h5py.File('test.hdf5', 'r') as h5fr:
    data = h5fr.get('aggea')[()] #this gets the values. You can also use hf.get('dataset_name').value as this gives insight what `[()]` is doing, though it's deprecated
    imv.setImage(data)

    # hf = h5py.File('test.hdf5', 'r')
    # n1 = np.array(hf['/pathtodata'][:])
    # print(n1.shape)
## Set a custom color map
colors = [
    (0, 0, 0),
    (45, 5, 61),
    (84, 42, 55),
    (150, 87, 60),
    (208, 171, 141),
    (255, 255, 255)
]
cmap = pg.ColorMap(pos=np.linspace(0.0, 1.0, 6), color=colors)
imv.setColorMap(cmap)

## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

Solution

  • This is a simple example that shows how to extract data (as a 2d array) from a HDF5 file and plot with pyqtgraph. It also creates a plot from NumPy array (for comparison purposes -- the 2 methods are nearly identical).

    • "Random" NumPy array data is generated for the x= and y= values in the first 2 plots.
    • The x= and y= values are read from an HDF5 file for second pair of plots.

    Only minor changes are required to work with your data. Changes required: 1) the HDF5 filename, and 2) the dataset names. You will have to figure out how to reshape your data from shape=(1038,1388) to appropriately shaped arrays for the X and Y data.

    Code below:

    from pyqtgraph.Qt import QtGui, QtCore
    import numpy as np
    import h5py
    import pyqtgraph as pg
    
    # create some HDF5 data in a 2-d array of X,Y pairs
    with h5py.File('plot_2d_data.h5','w') as h5f:
        data = h5f.create_dataset('data',shape=(100,2))
        data[:,0] = np.arange(0.0,10.0,0.1) ## X data points
        data[:,1] = np.random.normal(size=100) ## Y data points
    
    app = QtGui.QApplication([])
    
    win = pg.GraphicsLayoutWidget(show=True, title="2-D plot examples")
    win.resize(1000,600)
    win.setWindowTitle('pyqtgraph example: 2D Plotting')
    
    # Enable antialiasing for prettier plots
    pg.setConfigOptions(antialias=True)
    
    p1 = win.addPlot(title="Plot of NumPy data", 
                     x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100))
    
    p2 = win.addPlot(title="NumPy data with Points", 
                     x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100),
                     pen=(255,0,0), symbolBrush=(255,0,0))
    
    win.nextRow()
    
    with h5py.File('plot_2d_data.h5','r') as h5f:
        
        p3 = win.addPlot(title="Plot of HDF5 data", 
                         x=h5f['data'][:,0], y=h5f['data'][:,1])
    
        p4 = win.addPlot(title="HDF5 data with Points", 
                         x=h5f['data'][:,0], y=h5f['data'][:,1],
                         pen=(0,0,255), symbolBrush=(0,0,255))
    
    ## Start Qt event loop unless running in interactive mode or using pyside.
    if __name__ == '__main__':
        import sys
        if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
            QtGui.QApplication.instance().exec_()