Is there a way to use Apache POI to extract the following information (preferably all the info):
for the following charts:
All your listed charts seems to be of extended chart type which is not available in versions of Office Open XML up to year 2007. But those old versions of Office Open XML is what Apache POI is developed on.
The extended chart is not a XSSFChart
. XSSFChart
is of type application/vnd.openxmlformats-officedocument.drawingml.chart+xml
while extended chart is of type application/vnd.ms-office.chartex+xml
.
But we can using at least parts of apache poi and have to program the XSSFChartEx
class our own instead the XSSFChart
then. Unfortunately also a class XSSFChartExRelation
is needed because such a relation class of course also not exists already.
In XSSFChartEx
class we can have getters for needed parts as title, series, legend, axes then. Simplest will be to get the XML of the parts as XmlObject
.
Complete example:
import java.io.IOException;
import java.io.OutputStream;
import java.io.FileInputStream;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.ooxml.POIXMLRelation;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.xmlbeans.XmlObject;
import java.util.List;
import java.util.ArrayList;
public class GetChartExXML {
private static List<XSSFChartEx> getChartExList(XSSFSheet sheet) {
List<XSSFChartEx> resultList = new ArrayList<XSSFChartEx>();
try {
XSSFDrawing drawing = sheet.getDrawingPatriarch();
if (drawing != null) {
for (POIXMLDocumentPart dpart : drawing.getRelations()) {
PackagePart ppart = dpart.getPackagePart();
if ("application/vnd.ms-office.chartex+xml".equals(ppart.getContentType())) {
XSSFChartEx xssfChartEx = new XSSFChartEx(ppart);
String rId = drawing.getRelationId(dpart);
drawing.addRelation(
rId,
new XSSFChartExRelation(
"application/vnd.ms-office.chartex+xml",
"http://schemas.microsoft.com/office/2014/relationships/chartEx",
"/xl/charts/chartEx#.xml"),
xssfChartEx
);
resultList.add(xssfChartEx);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return resultList;
}
public static void main(String[] args) {
try {
XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream("ChartExample.xlsx"));
XSSFSheet sheet = workbook.getSheetAt(0);
System.out.println("Loaded sheet is " + sheet.getSheetName());
List<XSSFChartEx> chartExList = getChartExList(sheet);
System.out.println(chartExList);
for(XSSFChartEx xssfChartEx : chartExList) {
//System.out.println(xssfChartEx.getChartExXmlObject());
System.out.println(xssfChartEx.getTitle());
System.out.println(xssfChartEx.getSeriesCount());
System.out.println(xssfChartEx.getSeries(0));
System.out.println(xssfChartEx.getLegend());
System.out.println(xssfChartEx.getAxesCount());
System.out.println(xssfChartEx.getAxis(0));
}
} catch (Exception e) {
e.printStackTrace();
}
}
private static class XSSFChartEx extends POIXMLDocumentPart {
private XmlObject chartExXmlObject;
private XSSFChartEx(PackagePart part) throws Exception {
super(part);
chartExXmlObject = XmlObject.Factory.parse(part.getInputStream());
}
private XmlObject getChartExXmlObject() {
return chartExXmlObject;
}
private int getSeriesCount() {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:plotArea/cx:plotAreaRegion/cx:series"
);
return result.length;
}
private XmlObject getSeries(int number) {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:plotArea/cx:plotAreaRegion/cx:series"
);
if (result.length > number) return result[number];
return null;
}
private int getAxesCount() {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:plotArea/cx:axis"
);
return result.length;
}
private XmlObject getAxis(int number) {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:plotArea/cx:axis"
);
if (result.length > number) return result[number];
return null;
}
private XmlObject getTitle() {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:title"
);
if (result.length > 0) return result[0];
return null;
}
private XmlObject getLegend() {
XmlObject[] result = chartExXmlObject.selectPath(
"declare namespace cx='http://schemas.microsoft.com/office/drawing/2014/chartex' " +
".//cx:chart/cx:legend"
);
if (result.length > 0) return result[0];
return null;
}
@Override
protected void commit() throws IOException {
PackagePart part = getPackagePart();
OutputStream out = part.getOutputStream();
chartExXmlObject.save(out);
out.close();
}
}
private static class XSSFChartExRelation extends POIXMLRelation {
private XSSFChartExRelation(String type, String rel, String defaultName) {
super(type, rel, defaultName);
}
}
}
The real challenge is to find out the meaning of the found XML. Fortunately most of the XML element names are self-explanatory. So one should be able to find out where is the title text for example. But to get the meaning of series settings, legend settings and axes settings can be challenging.