Search code examples
javaexcelapache-poiexcel-charts

How to extract advanced chart info from Excel using Apache POI?


Is there a way to use Apache POI to extract the following information (preferably all the info):

  1. title (including position + style)
  2. axes (including position + style)
  3. series/ data ranges (including position + style)
  4. legend (including position + style)

for the following charts:

  1. High low close chart
  2. open high low close chart
  3. volume high low close chart
  4. volume open high low close chart
  5. box and whisker chart
  6. funnel chart
  7. paretoline chart
  8. region map chart
  9. sunburst chart
  10. treemap chart
  11. waterfall

Solution

  • All your listed charts seems to be of extended chart type which is not available in versions of Office Open XML up to year 2007. But those old versions of Office Open XML is what Apache POI is developed on.

    The extended chart is not a XSSFChart. XSSFChart is of type application/vnd.openxmlformats-officedocument.drawingml.chart+xml while extended chart is of type application/vnd.ms-office.chartex+xml.

    But we can using at least parts of apache poi and have to program the XSSFChartEx class our own instead the XSSFChart then. Unfortunately also a class XSSFChartExRelation is needed because such a relation class of course also not exists already.

    In XSSFChartEx class we can have getters for needed parts as title, series, legend, axes then. Simplest will be to get the XML of the parts as XmlObject.

    Complete example:

    import java.io.IOException;
    import java.io.OutputStream;
    import java.io.FileInputStream;
    
    import org.apache.poi.xssf.usermodel.XSSFWorkbook;
    import org.apache.poi.xssf.usermodel.XSSFSheet;
    import org.apache.poi.xssf.usermodel.XSSFDrawing;
    
    import org.apache.poi.ooxml.POIXMLDocumentPart;
    import org.apache.poi.ooxml.POIXMLRelation;
    import org.apache.poi.openxml4j.opc.PackagePart;
    
    import org.apache.xmlbeans.XmlObject;
    
    import java.util.List;
    import java.util.ArrayList;
    
    public class GetChartExXML {
                
     private static List<XSSFChartEx> getChartExList(XSSFSheet sheet) {
      List<XSSFChartEx> resultList = new ArrayList<XSSFChartEx>();
      try {
       XSSFDrawing drawing = sheet.getDrawingPatriarch();
       if (drawing != null) {
        for (POIXMLDocumentPart dpart : drawing.getRelations()) {
         PackagePart ppart = dpart.getPackagePart();
         if ("application/vnd.ms-office.chartex+xml".equals(ppart.getContentType())) {
          XSSFChartEx xssfChartEx = new XSSFChartEx(ppart);
          String rId = drawing.getRelationId(dpart);
          drawing.addRelation(
           rId, 
           new XSSFChartExRelation(
            "application/vnd.ms-office.chartex+xml",
            "http://schemas.microsoft.com/office/2014/relationships/chartEx",
            "/xl/charts/chartEx#.xml"),
           xssfChartEx
          );
          resultList.add(xssfChartEx);
         }
        }
       }
      } catch (Exception e) {
       e.printStackTrace();
      }
      return resultList;
     } 
    
     public static void main(String[] args) {
      try {
       XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream("ChartExample.xlsx"));
       XSSFSheet sheet = workbook.getSheetAt(0);
       System.out.println("Loaded sheet is " + sheet.getSheetName());
       List<XSSFChartEx> chartExList = getChartExList(sheet);
       System.out.println(chartExList);
       for(XSSFChartEx xssfChartEx : chartExList) {
        //System.out.println(xssfChartEx.getChartExXmlObject());
        System.out.println(xssfChartEx.getTitle());
        System.out.println(xssfChartEx.getSeriesCount());
        System.out.println(xssfChartEx.getSeries(0));
        System.out.println(xssfChartEx.getLegend());
        System.out.println(xssfChartEx.getAxesCount());
        System.out.println(xssfChartEx.getAxis(0));
       }
      } catch (Exception e) {
       e.printStackTrace();
      }
     }
    
     private static class XSSFChartEx extends POIXMLDocumentPart {
    
      private XmlObject chartExXmlObject;
    
      private XSSFChartEx(PackagePart part) throws Exception {
       super(part);
       chartExXmlObject = XmlObject.Factory.parse(part.getInputStream());
      }
    
      private XmlObject getChartExXmlObject() {
       return chartExXmlObject;
      }
    
      private int getSeriesCount() {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:plotArea/cx:plotAreaRegion/cx:series"
       );
       return result.length;
      }
      
      private XmlObject getSeries(int number) {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:plotArea/cx:plotAreaRegion/cx:series"
       );
       if (result.length > number) return result[number];
       return null;
      }
    
      private int getAxesCount() {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:plotArea/cx:axis"
       );
       return result.length;
      }
      
      private XmlObject getAxis(int number) {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:plotArea/cx:axis"
       );
       if (result.length > number) return result[number];
       return null;
      }
    
      private XmlObject getTitle() {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:title"
       );
       if (result.length > 0) return result[0];
       return null;
      }
      
      private XmlObject getLegend() {
       XmlObject[] result = chartExXmlObject.selectPath(
        "declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
        ".//cx:chart/cx:legend"
       );
       if (result.length > 0) return result[0];
       return null;
      }
    
      @Override
      protected void commit() throws IOException {
       PackagePart part = getPackagePart();
       OutputStream out = part.getOutputStream();
       chartExXmlObject.save(out);
       out.close();
      }
     }
    
     private static class XSSFChartExRelation extends POIXMLRelation {
      private XSSFChartExRelation(String type, String rel, String defaultName) {
       super(type, rel, defaultName);
      }
     }
    }
    

    The real challenge is to find out the meaning of the found XML. Fortunately most of the XML element names are self-explanatory. So one should be able to find out where is the title text for example. But to get the meaning of series settings, legend settings and axes settings can be challenging.