Search code examples
javaexcelxmlapache-poi

How to remove a XSSFSingleXmlCell with Apache POI 5.2.3?


Similarly to this post, How to read/write XML maps from/in excel with Apache POI in Java? I want to read/write XML maps from/to Excel with Apache POI. I am able to create multiple XML maps, but now I want to remove them.

Here I have an example with two XML maps: Two XML maps

Removing the schema and map is not the problem, but I have the requirement that specific user roles may drag the XML Map elements to an Excel file and save the file. Thus, I want to be able to remove the schema, map and related XML cells for a single data structure.

I am able to remove the related tables (XSSFTable), but I could not find out how to remove the single XML cells (XSSFSingleXmlCell).

In the below code I tried to delete the associated POIXMLDocumentPart with reflection similar to this post How to remove all formulas from workbook with Java POI. In principle, I am able to delete XML cells with the removeRelation in the code below, but the affected POIXMLDocumentPart object contains all of the single XML cells in the Excel file. This would also delete the related cells from the other XML map.

The code using Apache POI 5.2.3:

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.xssf.model.MapInfo;
import org.apache.poi.xssf.model.SingleXmlCells;
import org.apache.poi.xssf.usermodel.XSSFMap;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFTable;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xssf.usermodel.helpers.XSSFSingleXmlCell;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMap;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMapInfo;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSchema;

import lombok.extern.log4j.Log4j2;

@Log4j2
public class RemoveXmlMap {

    public static boolean removeXmlZuordnung(XSSFWorkbook workbook, MapInfo mapInfo, String schemaId) {
        if (removeSchema(mapInfo, schemaId)) {
            return removeMap(workbook, mapInfo, schemaId);
        }
        return false;
    }

    private static boolean removeSchema(MapInfo mapInfo, String schemaId) {
        CTMapInfo ctMapInfo = mapInfo.getCTMapInfo();
        int index = -1;
        List<CTSchema> schemas = ctMapInfo.getSchemaList();
        for (int i = 0; i < schemas.size(); i++) {
            if (schemaId.equals(schemas.get(i).getID())) {
                index = i;
                break;
            }
        }
        if (index < 0) {
            return false;
        }
        ctMapInfo.removeSchema(index);
        return true;
    }

    private static boolean removeMap(XSSFWorkbook workbook, MapInfo mapInfo, String schemaId) {
        CTMapInfo ctMapInfo = mapInfo.getCTMapInfo();
        int index = -1;
        List<CTMap> ctMaps = ctMapInfo.getMapList();
        for (int i = 0; i < ctMaps.size(); i++) {
            if (schemaId.equals(ctMaps.get(i).getSchemaID())) {
                index = i;
                break;
            }
        }
        if (index < 0) {
            return false;
        }
        CTMap ctMap = ctMapInfo.getMapArray(index);
        XSSFMap xssfMap = new XSSFMap(ctMap, mapInfo);
        removeXmlMappingsForMap(workbook, mapInfo, xssfMap);
        ctMapInfo.removeMap(index);
        return true;
    }

    private static boolean removeXmlMappingsForMap(XSSFWorkbook workbook, MapInfo mapInfo, XSSFMap xssfMap) {
        if (!removeXmlMappingsForSingleCells(workbook, mapInfo, xssfMap)) {
            return false;
        }
        return removeXmlMappingsForTables(workbook, xssfMap);
    }

    private static boolean removeXmlMappingsForSingleCells(XSSFWorkbook workbook, MapInfo mapInfo, XSSFMap xssfMap) {
        Set<POIXMLDocumentPart> documentPartForSingleXmlCells = getPoiXmlDocumentPartForSingleCells(mapInfo, xssfMap.getCtMap());
        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            for (POIXMLDocumentPart documentPart : documentPartForSingleXmlCells) {
                if (documentPart instanceof SingleXmlCells singleXMLCells) {
                    XSSFSheet sheet = workbook.getSheetAt(i);
                    List<XSSFSingleXmlCell> xssfSingleXmlCells = singleXMLCells.getAllSimpleXmlCell();
                    if (StringUtils.isNotBlank(sheet.getRelationId(documentPart)) && !xssfSingleXmlCells.isEmpty()) {
                        if (!removeRelation(sheet, documentPart)) {
                            return false;
                        }
                    }
                }
            }
        }
        return true;
    }

    private static boolean removeXmlMappingsForTables(XSSFWorkbook workbook, XSSFMap xssfMap) {
        for (XSSFTable table : xssfMap.getRelatedTables()) {
            for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                XSSFSheet sheet = workbook.getSheetAt(i);
                if (sheet.getTables().contains(table)) {
                    sheet.removeTable(table);
                    if (sheet.getTables().contains(table)) {
                        return false;
                    }
                }
            }
        }
        return true;
    }

    // Based on XSSFMap.getRelatedSingleXMLCell
    private static Set<POIXMLDocumentPart> getPoiXmlDocumentPartForSingleCells(MapInfo mapInfo, CTMap ctMap) {
        Set<POIXMLDocumentPart> poiXmlDocumentParts = new HashSet<>();

        int sheetNumber = mapInfo.getWorkbook().getNumberOfSheets();
        for (int i = 0; i < sheetNumber; i++) {
            XSSFSheet sheet = mapInfo.getWorkbook().getSheetAt(i);
            for (POIXMLDocumentPart p : sheet.getRelations()) {
                if (p instanceof SingleXmlCells singleXMLCells) {
                    for (XSSFSingleXmlCell cell : singleXMLCells.getAllSimpleXmlCell()) {
                        if (cell.getMapId() == ctMap.getID()) {
                            poiXmlDocumentParts.add(p);
                        }
                    }
                }
            }
        }
        return poiXmlDocumentParts;
    }

    private static boolean removeRelation(XSSFSheet sheet, POIXMLDocumentPart poiXmlDocumentPart) {
        try {
            Method removeRelation = POIXMLDocumentPart.class.getDeclaredMethod("removeRelation", POIXMLDocumentPart.class, boolean.class);
            removeRelation.setAccessible(true);
            Boolean success = (Boolean) removeRelation.invoke(sheet, poiXmlDocumentPart, true);
            if (Boolean.TRUE.equals(success)) {
                return true;
            }
        } catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
            log.error("Could not execute method 'removeRelation'!");
        }
        return false;
    }
}

Is there any way to delete single XML cells without affecting other XML maps?


Solution

  • How to deal with such kinds of requirements?

    All Office Open XML files, as *.xlsx too, are ZIP archives. So one can simply unzip them and have a look into the structure. Doing so for a *.xlsx file having XML maps to single cells, one will find /xl/tables/tableSingleCells[n].xml for each sheet having single cells mapped to XML. The root element of the XML in those files is singleXmlCells. In terms of the Office Open XML file structure, there is a document part /xl/tables/tableSingleCells[n].xml having XML to store the XML mapping for single cells.

    Is Apache POI supporting this kind of document part?

    In Apache POI 5.2.3, there is SingleXmlCells, which is exactly that document part. But there is not any getter to get objects of that class already. But each document part extends org.apache.poi.ooxml.POIXMLDocumentPart which has a method getRelations() to get all related parts. So one can get the SingleXmlCells from XSSFSheet by looping through all related parts of given XSSFSheet until one finds instance of SingleXmlCells.

    Example:

    import org.apache.poi.ss.usermodel.WorkbookFactory;
    import org.apache.poi.xssf.model.SingleXmlCells;
    import org.apache.poi.xssf.usermodel.*;
    
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    
    class ExcelRemoveCellFromSingleXmlCells {
            
     public static void main(String[] args) throws Exception {
      try (
           XSSFWorkbook workbook = (XSSFWorkbook)WorkbookFactory.create(new FileInputStream("./ExcelWithXMLMapSingleCells.xlsx"));
           FileOutputStream out = new FileOutputStream("./ExcelWithXMLMapSingleCellsNew.xlsx")
           ) {
               
       XSSFSheet sheet = workbook.cloneSheet(0);
       for (org.apache.poi.ooxml.POIXMLDocumentPart relatedPart : sheet.getRelations()) {
        System.out.println(relatedPart.getClass());
        if (relatedPart instanceof org.apache.poi.xssf.model.SingleXmlCells) {
         SingleXmlCells singleXmlCellsTable = (SingleXmlCells)relatedPart;
         org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSingleXmlCells ctSingleXmlCells = singleXmlCellsTable.getCTSingleXMLCells();
         System.out.println(ctSingleXmlCells);
        }
       }
       
       workbook.write(out);
      } 
     }
    }
    

    Does SingleXmlCells support removing single XSSFSingleXmlCells?

    No. But it supports getting the XML object org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSingleXmlCells using method getCTSingleXMLCells().

    What does that CTSingleXmlCells support?

    There is no API documentation for org.openxmlformats.schemas.* classes public available. But one can download the sources of poi-ooxml-full-5.2.3 (for example from here: https://repo1.maven.org/maven2/org/apache/poi/poi-ooxml-full/5.2.3/). Then one can do javadoc from those sources to create a API documentation.

    There you will find the method removeSingleXmlCell​(int i) in CTSingleXmlCells, which "Removes the ith "singleXmlCell" element".