Search code examples
javapentaho-data-integration

How to completely remove specific input fields in User Defined Java Class in Pentaho


I don't understand how to completely remove specific input fields when using the User Defined Java Class in Pentaho Data Integration.

Let's assume that I have input fields A, B and C. let's Say I want to concatenate the values in B and C (separated by a space), write the result in C, and leave only the fields with the names A and C without the field with the name B (the real problem is much more complicated). I understand how to write the result in field C, but I don't know how to completely delete field B.

private String outFieldName1;
private String outFieldName2;
private String removeFieldName;

private int outFieldIndex1;
private int outFieldIndex2;
private int removeFieldIndex;

private Object[] inputRow;

private int inputRowMetaSize;
private int outputRowMetaSize;

public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
{
    inputRow = getRow();
    if (inputRow == null) {
        setOutputDone();
        return false;
    }

    if (first) processMetadata();

    pushOutputRow( get(Fields.In, removeFieldName).getString(inputRow) + " "
                 + get(Fields.In, outFieldName2).getString(inputRow));

    return true;
}

private void processMetadata() throws KettleException {
    outFieldName1 = getParameter("OUT1");
    outFieldName2 = getParameter("OUT2");
    removeFieldName = getParameter("REMOVE");

    outFieldIndex1 = getInputRowMeta().indexOfValue(outFieldName1);
    outFieldIndex2 = getInputRowMeta().indexOfValue(outFieldName2);
    removeFieldIndex = getInputRowMeta().indexOfValue(removeFieldName);

    inputRowMetaSize = data.inputRowMeta.size();
    outputRowMetaSize = data.outputRowMeta.size();

    first=false;
}


private void pushOutputRow(String content) throws KettleException {
    Object[] outRow = RowDataUtil.allocateRowData(outputRowMetaSize);

    for (int fieldN=0; fieldN < inputRow.length; ++fieldN) {
        if(fieldN == outFieldIndex1) {
            outRow[fieldN] = inputRow[fieldN];
        } else if(fieldN == outFieldIndex2) {
            outRow[fieldN] = content;
        } else if(fieldN == removeFieldIndex) {
            outRow[fieldN] = "";
            // Unable to delete this row!
        }

    }

    putRow( data.outputRowMeta, outRow );
}

Solution

  • All that was required was:

    1. save data.outputRowMeta in a variable of type RowMetaInterface (in my case rowMeta);
    2. call the rowMeta.removeValueMeta method for it with the name or index of the field to delete;
    3. search for indexes of output fields and the amount of output data using rowMeta instead of getInputRowMeta();
    4. in the putRow() method, use rowMeta as the first parameter.

    ``

    private String outFieldName1;
    private String outFieldName2;
    private String removeFieldName;
    
    private int outFieldIndex1;
    private int outFieldIndex2;
    
    private Object[] inputRow;
    
    private int inputRowMetaSize;
    private int outputRowMetaSize;
    private RowMetaInterface rowMeta;
    
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
    {
        inputRow = getRow();
        if (inputRow == null) {
            setOutputDone();
            return false;
        }
    
        if (first) processMetadata();
    
        pushOutputRow( get(Fields.In, removeFieldName).getString(inputRow) + " "
                     + get(Fields.In, outFieldName2).getString(inputRow));
    
        return true;
    }
    
    private void processMetadata() throws KettleException {
        outFieldName1 = getParameter("OUT1");
        outFieldName2 = getParameter("OUT2");
        removeFieldName = getParameter("REMOVE");
    
        inputRowMetaSize = data.inputRowMeta.size();
        outputRowMetaSize = data.outputRowMeta.size();
    
        rowMeta = data.outputRowMeta;
        rowMeta.removeValueMeta(removeFieldName);
    
        outFieldIndex1 = rowMeta.indexOfValue(outFieldName1);
        outFieldIndex2 = rowMeta.indexOfValue(outFieldName2);
    
        outputRowMetaSize = rowMeta.size();
    
        first=false;
    }
    
    private void pushOutputRow(String content) throws KettleException {
        Object[] outRow = RowDataUtil.allocateRowData(outputRowMetaSize);
    
        for (int fieldN=0; fieldN < inputRow.length; ++fieldN) {
    
            if(fieldN == outFieldIndex1) {
                outRow[fieldN] = inputRow[fieldN];
            } else if(fieldN == outFieldIndex2) {
                outRow[fieldN] = content;
            }
        }
    
        putRow( rowMeta, outRow );
    }
    

    ``