Search code examples
javajpa-2.0hbasedatanucleuskundera

JPA2 with HBase datastore using Datanucleus writing duplicate rows


I'm creating simple java web app that writes one row of data to HBase table via JPA2 (using Datanucleus) everything working except it writing 2 duplicate rows instead of just writing 1 row

Results

ROW                                        COLUMN+CELL                                                                                                              
 402881813acfed78013acfed78e00000          column=ACCOUNT_TABLE:FIRSTNAME, timestamp=1352108207916, value=testname                                                     
 402881813acfed78013acfed78e00000          column=ACCOUNT_TABLE:ID, timestamp=1352108207916, value=402881813acfed78013acfed78e00000                                 
 402881813acfed78013acfed78e00000          column=ACCOUNT_TABLE:LASTNAME, timestamp=1352108207916, value=testname2                                                     
 402881813acfed78013acfed78e00000          column=ACCOUNT_TABLE:LEVEL, timestamp=1352108207916, value=\x00\x00\x00\x03                                              
 402881813acfed78013acfedf7e20001          column=ACCOUNT_TABLE:FIRSTNAME, timestamp=1352108210172, value=testname                                                     
 402881813acfed78013acfedf7e20001          column=ACCOUNT_TABLE:ID, timestamp=1352108210172, value=402881813acfed78013acfedf7e20001                                 
 402881813acfed78013acfedf7e20001          column=ACCOUNT_TABLE:LASTNAME, timestamp=1352108210172, value=testname2                                                     
 402881813acfed78013acfedf7e20001          column=ACCOUNT_TABLE:LEVEL, timestamp=1352108210172, value=\x00\x00\x00\x03                                              
2 row(s) in 0.1270 seconds

Datanucleus version in pom.xml

<org.datanucleus-version>3.1.1</org.datanucleus-version>

dependencies in pom.xml

 <dependencies>
        <!-- JPA 2.0 Spec -->
        <dependency>
            <groupId>org.apache.geronimo.specs</groupId>
            <artifactId>geronimo-jpa_2.0_spec</artifactId>
            <version>1.1</version>
        </dependency>

        <!-- JDO API -->
        <dependency>
            <groupId>javax.jdo</groupId>
            <artifactId>jdo-api</artifactId>
            <version>3.0</version>
        </dependency>

        <!-- DataNucleus HBase -->
        <dependency>
            <groupId>org.datanucleus</groupId>
            <artifactId>datanucleus-hbase</artifactId>
            <version>${org.datanucleus-version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>1.0.4</version>
            <exclusions>
                <exclusion>
                    <groupId>org.codehaus.jackson</groupId>
                    <artifactId>jackson-mapper-asl</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.codehaus.jackson</groupId>
                    <artifactId>jackson-core-asl</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase</artifactId>
            <version>0.94.0</version>
        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.1.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
            <version>3.4.3</version>
        </dependency>

        <!-- DataNucleus Core -->
        <dependency>
            <groupId>org.datanucleus</groupId>
            <artifactId>datanucleus-core</artifactId>
            <version>${org.datanucleus-version}</version>
            <scope>runtime</scope>
        </dependency>

        <!-- DataNucleus Enhancer -->
        <dependency>
            <groupId>org.datanucleus</groupId>
            <artifactId>datanucleus-enhancer</artifactId>
            <version>${org.datanucleus-version}</version>
        </dependency>

        <!-- DataNucleus JPA api -->
        <dependency>
            <groupId>org.datanucleus</groupId>
            <artifactId>datanucleus-api-jpa</artifactId>
            <version>${org.datanucleus-version}</version>
        </dependency>

Plugin in pom.xml

           <plugin>
                <groupId>org.datanucleus</groupId>
                <artifactId>maven-datanucleus-plugin</artifactId>
                <version>${org.datanucleus-version}</version>
                <configuration>
                    <api>JPA</api>
                    <log4jConfiguration>${basedir}/log4j.properties</log4jConfiguration>
                    <verbose>true</verbose>
                    <persistenceUnitName>hbase-test</persistenceUnitName>
                </configuration>
                <executions>
                    <execution>
                        <phase>process-classes</phase>
                        <goals>
                            <goal>enhance</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

Account.java

package org.test.model;
import javax.persistence.*;
import java.io.Serializable;

@Entity
@Table(name = "ACCOUNT_TABLE")
public class Account implements Serializable {

    private static final long serialVersionUID = 1L;

    @Id
    @Column(name = "ACCOUNT_TABLE:ID")
    @GeneratedValue(strategy = GenerationType.AUTO)
    private String id;

    @Column(name = "ACCOUNT_TABLE:FIRSTNAME")
    private String firstName;

    @Column(name = "ACCOUNT_TABLE:LASTNAME")
    private String lastName;

    @Column(name = "ACCOUNT_TABLE:LEVEL")
    private int level = 0;

    public Account() {
    }

    public Account(String firstName, String lastName, int level) {
        super();
        this.firstName = firstName;
        this.lastName = lastName;
        this.level = level;
    }

    public int getLevel() {
        return level;
    }

    public void setLevel(int level) {
        this.level = level;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getFirstName() {
        return firstName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }
}

persistence.xml

<persistence xmlns="http://java.sun.com/xml/ns/persistence"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://java.sun.com/xml/ns/persistence http://java.sun.com/xml/ns/persistence/persistence_1_0.xsd" version="1.0">

    <persistence-unit name="hbase-test" transaction-type="RESOURCE_LOCAL">
        <provider>org.datanucleus.api.jpa.PersistenceProviderImpl</provider>
        <mapping-file>org/test/model/orm.xml</mapping-file>
        <class>org.test.model.Account</class>
        <exclude-unlisted-classes>true</exclude-unlisted-classes>

        <properties>
            <property name="datanucleus.storeManagerType" value="hbase" />
            <property name="datanucleus.ConnectionURL" value="hbase:localhost:60010"/>
            <property name="datanucleus.ConnectionUserName" value=""/>
            <property name="datanucleus.ConnectionPassword" value=""/>
            <property name="datanucleus.autoCreateSchema" value="true"/>
            <property name="datanucleus.autoCreateTables" value="true" />
            <property name="datanucleus.autoCreateColumns" value="true" />
            <property name="datanucleus.validateTables" value="true"/>
            <property name="datanucleus.validateConstraints" value="false"/>
            <property name="datanucleus.Optimistic" value="false"/>
            <property name="datanucleus.Multithreaded" value="true" />
        </properties>
    </persistence-unit>
</persistence>

orm.xml

<?xml version="1.0" encoding="UTF-8" ?>
    <entity-mappings xmlns="http://java.sun.com/xml/ns/persistence/orm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/persistence/orm http://java.sun.com/xml/ns/persistence/orm_1_0.xsd" version="1.0">
        <description>JPA Mapping file for Test</description>
        <package>org.test.model</package>
        <entity class="org.test.model.Account" name="Account">
            <table name="ACCOUNT_TABLE" />
            <attributes>
                <id name="id">
                    <generated-value strategy="AUTO"/>
                </id>
                <basic name="firstName">
                    <column name="ACCOUNT_TABLE:FIRSTNAME"/>
                </basic>
                <basic name="lastName">
                    <column name="ACCOUNT_TABLE:LASTNAME"/>
                </basic>
                <basic name="level">
                    <column name="ACCOUNT_TABLE:LEVEL"/>
                </basic>
            </attributes>
        </entity>
    </entity-mappings>

TestAddAccountRow.java

package org.test.app;

import org.test.model.Account;
import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.EntityTransaction;
import javax.persistence.Persistence;

public class TestAddAccountRow {
    public void addAccount() {
        EntityManagerFactory emf = Persistence.createEntityManagerFactory("hbase-test");
        EntityManager em = emf.createEntityManager();
        EntityTransaction tx = null;
        Account a2 = new Account("testname","testname2",3);
        tx = em.getTransaction();
        tx.begin();
        em.persist(a2);
        tx.commit();
    }
}

Solution

  • UPDATE

    @Datanucleus

    I had code in spring mvc controller calling DAO (adding Account object), i don't know why but this was causing the duplicate rows, Datanucleus works perfectly.


    While i couldn't find why it was creating duplicate rows, i swap out Datanucleus for Kundera it seems to solve the problem of duplicate rows

    I think there a bug in Datanucleus


    Kundera version number

    <kundera-version>2.1</kundera-version>
    

    pom.xml dependencies

            <dependency>
                <groupId>javax.persistence</groupId>
                <artifactId>persistence-api</artifactId>
                <version>2.0</version>
            </dependency>
    
            <dependency>
                <groupId>com.impetus.core</groupId>
                <artifactId>kundera-core</artifactId>
                <version>${kundera-version}</version>
            </dependency>
    
            <dependency>
                <groupId>com.impetus.client</groupId>
                <artifactId>kundera-hbase</artifactId>
                <version>${kundera-version}</version>
                <exclusions>
                <exclusion>
                    <groupId>org.apache.hbase</groupId>
                    <artifactId>hbase</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.hadoop</groupId>
                    <artifactId>hadoop-core</artifactId>
                </exclusion>
                </exclusions>
            </dependency>
    
            <dependency>
                <groupId>commons-collections</groupId>
                <artifactId>commons-collections</artifactId>
                <version>3.2.1</version>
            </dependency>
    
            <dependency>
                <groupId>commons-lang</groupId>
                <artifactId>commons-lang</artifactId>
                <version>2.4</version>
            </dependency>
    
            <dependency>
                <groupId>commons-logging</groupId>
                <artifactId>commons-logging</artifactId>
                <version>1.1.1</version>
            </dependency>
    
            <dependency>
                <groupId>commons-codec</groupId>
                <artifactId>commons-codec</artifactId>
                <version>1.2</version>
            </dependency>
    
            <dependency>
                <groupId>com.impetus.kundera.rest</groupId>
                <artifactId>kundera-rest</artifactId>
                <version>${kundera-version}</version>
            </dependency>
    
            <!-- HBase and Hadoop -->
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-core</artifactId>
                <version>1.0.4</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.codehaus.jackson</groupId>
                        <artifactId>jackson-mapper-asl</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.codehaus.jackson</groupId>
                        <artifactId>jackson-core-asl</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase</artifactId>
                <version>0.94.0</version>
            </dependency>
    

    persistence.xml

    <?xml version="1.0" encoding="UTF-8" ?>
    <!--
      Copyright (C) 2010 Bartosch Warzecha, Matthias Weßendorf
    
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
      http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License.
    -->
    <persistence xmlns="http://java.sun.com/xml/ns/persistence"
                 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                 xsi:schemaLocation="http://java.sun.com/xml/ns/persistence
        http://java.sun.com/xml/ns/persistence/persistence_2_0.xsd"
                 version="2.0">
    
        <persistence-unit name="hbase">
            <provider>com.impetus.kundera.KunderaPersistence</provider>
            <class>org.test.model.Account</class>
            <exclude-unlisted-classes>true</exclude-unlisted-classes>
            <properties>
                <property name="kundera.nodes" value="localhost" />
                <property name="kundera.port" value="60010" />
                <property name="kundera.keyspace" value="test" />
                <property name="kundera.dialect" value="hbase" />
                <property name="kundera.client.lookup.class" value="com.impetus.client.hbase.HBaseClientFactory" />
                <property name="kundera.cache.provider.class"
                          value="com.impetus.kundera.cache.ehcache.EhCacheProvider" />
                <property name="kundera.cache.config.resource" value="/ehcache-test.xml" />
                <property name="kundera.ddl.auto.prepare" value="create" />
            </properties>
        </persistence-unit>
    </persistence>