python python-3.x apache-spark pyspark hive

Pyspark JDBC return all rows with column names

I'm querying a Spark's (Hive) database table using Spark 3.2.1 in Python 3.7 with the below code.

This tables are fully and accessible and manipulable with other system like DBeaver, PowerBI and SSRS. Even a similar script in R return the data correctly. But when i tryin use this Python script all rows returned by the jdbc contains only the column name instead the data.

This is the code:

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

url = 'jdbc:hive2://1.1.1.1:10000/default;transportMode=http;httpPath=cliservice'
table = 'schema.table_name'
username = 'username'
password = '123456'

remote_table = spark.read\
                    .format("jdbc")\
                    .option("driver", "org.apache.hive.jdbc.HiveDriver")\
                    .option("url", url)\
                    .option("dbtable", table)\
                    .option("user", username)\
                    .option("password", password)\
                    .load()\
                    .limit(2)

remote_table.show()
spark.stop()

PySpark return the data from my Hive tables.

Solution

This code resolve the problem:

# -*- coding: utf-8 -*-
"""
Created on Fri Jun  2 07:32:51 2023

@author: yfdantas
"""


import os
import jaydebeapi

def spark_connect():

    jdbc_url = 'jdbc:hive2://1.1.1.1:10000/default;transportMode=http;httpPath=cliservice'
    jdbc_driver_class = "com.cloudera.hive.jdbc.HS2Driver"
    jdbc_user = 'username'
    jdbc_password = '123456'
    jdbc_jar = "E:/scripts/libs/HiveJDBC42.jar"

    conn = jaydebeapi.connect(
        jclassname=jdbc_driver_class,
        url=jdbc_url,
        driver_args=[jdbc_user, jdbc_password],
        jars=jdbc_jar
    )
    
    return conn

def oracle_close(conn):
    conn.close()