Search code examples
javajdbchiveteradataexecutequery

executeQuery not returning result for Hive


I am trying to export data from Hive to Teradata. Below is the code I have for the same:

/* Code Start */
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class HiveToTd {
private final static String tdUser = "**********";
private final static String tdPass = "**********";

private final static String hiveUser = "**********";
private final static String hivePass = "**********";

private static String driverName = "org.apache.hive.jdbc.HiveDriver";
/**
 * @param args
 * @throws ClassNotFoundException 
 * @throws SQLException 
 */
public static void main(String[] args) throws ClassNotFoundException, SQLException {
    // Get the Teradata connection 
    Class.forName("com.teradata.jdbc.TeraDriver");
    Connection tdcon = DriverManager.getConnection("jdbc:teradata://database.XXXXX.com/TMODE=ANSI,TYPE=FASTLOAD", tdUser, tdPass);
    System.out.println("Connected to Teradata.");
    
    // Get our hive connection
    Class.forName(driverName);
    System.out.println("Connecting to Hive.");
    Connection hivecon = DriverManager.getConnection("jdbc:hive2://bigdatabase.xxxxxx.com:10000/default", hiveUser, hivePass);
    System.out.println("Connected to Hive.");
    
    // Select our table from Hive
    
    Statement hst = hivecon.createStatement();
    System.out.println("Executing Statement");
    ResultSet hrs = hst.executeQuery("SELECT COL1, COL2, COL3 FROM db.table limit 100");
    
    System.out.println("Get DATA");
    int count= 0;
    if(hrs.next())
    {
        count++;
    }
  System.out.println(count);

The count is returned "1" and not 100. I have verified that I have more than 1 million records in the tables in Hive. What am I doing wrong? It just returns me the header row and that's it. I would have assumed the issue to be with the connection but then it gives me the right header row. So it has to be something else.

Update

So looks like the code is actually working. Thanks for helping me troubleshoot Thusitha.

The next part is more troubling. This is for Fast Load to TD.

// Empty the staging table
    tdcon.createStatement().executeUpdate("delete from dbname.staging_table");

    // Create prepared statement for Teradata
    System.out.println("Begin load to Teradata");
    tdcon.setAutoCommit(false);
    
    PreparedStatement ps = tdcon.prepareStatement("insert into dbname.staging_table values (?,?,?)");
    System.out.println("Start Fastload");
    int i;
    for (i = 1; hrs.next(); i++){
        ps.setString(1, hrs.getString(1));
        ps.setString(2, hrs.getString(2));
        ps.setString(3, hrs.getString(3));
        ps.addBatch();
        System.out.println(i);
        if (i % 10000 == 0){
            ps.executeBatch();
        }
    }
    
    
    
    if (i % 10000 != 0){
        ps.executeBatch();
    }
    
    
    
    tdcon.commit();
    tdcon.setAutoCommit(true);
    ps.close();
    hrs.close();
    
    sideLoad("dbname.staging_table", "dbname.final_table", tdcon);
    tdcon.close();
    hivecon.close();
    
}

public static int sideLoad(String fromTable, String toTable, Connection conn) throws SQLException{
    return (conn.createStatement().executeUpdate("INSERT INTO " + toTable + " SELECT * FROM " + fromTable));
    
}



}

The error I get after "start Fastload" message is:

Exception in thread "main" java.sql.SQLException: [Teradata JDBC Driver] [TeraJDBC 15.00.00.20] [Error 1103] [SQLState HY000] Cannot add an empty batch of rows to a database table
    at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:94)
    at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:64)
    at com.teradata.jdbc.jdbc.fastload.FastLoadManagerPreparedStatement.executeBatch(FastLoadManagerPreparedStatement.java:2049)
    at com.optus.insights.HiveToTd.main(HiveToTd.java:84)

Solution

  • You are having a if condition so count it will only go to the first row of the ResultSet and increment count 1

    If you want to iterate through all the rows use while loop instead of if changes your code as follows

    while(hrs.next()){
        count++;
    }