Search code examples
mapsgsonbean-io

Flat file to json conversion using beanIo


I am trying to parse a fixedlength flat file using beanIo to json

Code:

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Map;

import org.beanio.BeanIOConfigurationException;
import org.beanio.BeanReader;
import org.beanio.StreamFactory;
import org.junit.Test;

import com.google.gson.Gson;

public class EmployeeBeanIOHandlerTest {

    @Test
    public void testHandleEmployee() {

        // mapping pattern file
        String mappingPatternFile = "pattern-mapping.xml";

        // data file (csv)
        String objectFile = "employee.csv";

        // stream name defined in pattern mapping file
        String streamName = "empData";

        Gson gson = new Gson();

        BeanReader beanReader = null;
        Reader reader = null;
        StreamFactory factory = null;
        InputStream in = null;

        try {

            System.out.println("## RESULT FOR " + objectFile + " ##");

            // create a StreamFactory
            factory = StreamFactory.newInstance();

            // load the setting file
            in = this.getClass().getClassLoader()
                    .getResourceAsStream(mappingPatternFile);

            // get input stream reader of object file (data file)
            reader = new InputStreamReader(this.getClass().getClassLoader()
                    .getResourceAsStream(objectFile));

            // load input stream to stream factory
            factory.load(in);

            beanReader = factory.createReader(streamName, reader);
            Map<?, ?> record = null;
            while ((record = (Map<?, ?>) beanReader.read()) != null) {
                System.out.println(beanReader.getRecordName() + ": "
                        + gson.toJson(record));
            }

        } catch (BeanIOConfigurationException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                in.close();
                if (beanReader != null) {
                    beanReader.close();
                }
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

However the output i see:

header: {"id":"Header","date":"01012013"}

emp: {"lastName":"Lilik","title":"Senior Developer","hireDate":"Oct 1, 2009 
12:00:00 AM","salary":7500000,"firstName":"Robertus"}

emp: {"lastName":"Doe","title":"Architect","hireDate":"Jan 15, 2008 12:00:00 AM","salary":8000000,"firstName":"Jane"}

emp: {"lastName":"Anderson","title":"Manager","hireDate":"Mar 18, 2006 12:00:00 AM","salary":9000000,"firstName":"Jon"}

trailer: {"id":"Trailer","count":"3"}

Which generates separate json object for each record found.

Reference site: http://www.sourcefreak.com/2013/06/painless-flat-file-parsing-with-beanio/

Below is my requirement:

  1. I want to have a consolidated Json file.
  2. In case of duplicate record it should form an json array.

I would appreciate a help on this.


Solution

  • This answer is based on the data and pattern-mapping.xml file found in the link provided by the OP.

    Data:

    Header,01012013
    Robertus,Lilik,Senior Developer,"75,000,00",10012009
    Jane,Doe,Architect,"80,000,00",01152008
    Jon,Anderson,Manager,"90,000,00",03182006
    Footer,3

    Mapping File:
    This is the modified pattern-mapping.xml file. Note the use of a <group> element (myGroup) to encapsulate everything into a single group, which will force the BeanReader to read everything in one go. I also changed the maxOccurs to be 1 (one) for both the Header and the Footer records. Also, added the collection="list" attribute to theemp` record

    <?xml version='1.0' encoding='UTF-8' ?>
    <beanio xmlns="http://www.beanio.org/2012/03" 
            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            xsi:schemaLocation="http://www.beanio.org/2012/03 http://www.beanio.org/2012/03/mapping.xsd">
    
      <stream name="empData" format="csv">
        <group name="myGroup" class="map">
          <record name="header" class="map" ridLength="0-2" maxOccurs="1">
            <field name="id" rid="true" maxOccurs="1" literal="Header" />
            <field name="date" />
          </record>
    
          <record name="emp" class="map" ridLength="4-5" collection="list">
            <field name="firstName" />
            <field name="lastName" />
            <field name="title" />
            <field name="salary" type="java.math.BigDecimal" format="#,###,###,00" />
            <field name="hireDate" type="java.util.Date" format="MMddyyyy" minOccurs="0" />
          </record>
    
          <record name="trailer" class="map" ridLength="2" maxOccurs="1">
            <field name="id" />
            <field name="count" />
          </record>
        </group>
      </stream>
    </beanio>
    

    Using the test case supplied and the modified mapping file, we get this result (reformatted by me):

    myGroup: {
      "trailer": {
        "count": "3",
        "id": "Footer"
      },
      "header": {
        "date": "01012013",
        "id": "Header"
      },
      "emp": [
        {
          "firstName": "Robertus",
          "lastName": "Lilik",
          "hireDate": "Oct 1, 2009 12:00:00 AM",
          "title": "Senior Developer",
          "salary": 7500000
        },
        {
          "firstName": "Jane",
          "lastName": "Doe",
          "hireDate": "Jan 15, 2008 12:00:00 AM",
          "title": "Architect",
          "salary": 8000000
        },
        {
          "firstName": "Jon",
          "lastName": "Anderson",
          "hireDate": "Mar 18, 2006 12:00:00 AM",
          "title": "Manager",
          "salary": 9000000
        }
      ]
    }
    

    Hope this helps