Search code examples
pythonhadoophivethrifthue

How realtime capture logs of query from HiveServer2 with python client?


I use modified version of pyhs2 (https://pypi.python.org/pypi/pyhs2) with ability run async queries and additional methods from TCLIService.Client (GetLog, send_GetLog, recv_GetLog) in sources of Hue (https://github.com/cloudera/hue/blob/master/apps/beeswax/gen-py/TCLIService/TCLIService.py#L739)

But when I run TCLIService.Client.GetLog method, there is an error:

$ python example.py 
Traceback (most recent call last):
  File "example.py", line 85, in <module>
    rq = client.GetLog(lq)
  File "/Users/toly/hive_streaming/libs/pyhs4/TCLIService/TCLIService.py", line 757, in GetLog
    return self.recv_GetLog()
  File "/Users/toly/hive_streaming/libs/pyhs4/TCLIService/TCLIService.py", line 773, in recv_GetLog
    raise x
thrift.Thrift.TApplicationException: Invalid method name: 'GetLog'

In script I use HiveServer2 from Cloudera VM. Same server, as I quess, used by Hue and it successfully works. In addition I try client_protocol in range from 0 to 7 for creating session.

import time
import sasl

from thrift.protocol.TBinaryProtocol import TBinaryProtocol
from thrift.transport.TSocket import TSocket
from thrift.transport.TTransport import TBufferedTransport
from libs.pyhs4.cloudera.thrift_sasl import TSaslClientTransport


from libs.pyhs4.TCLIService import TCLIService
from libs.pyhs4.TCLIService.ttypes import TOpenSessionReq, TGetTablesReq, TFetchResultsReq,\
    TStatusCode, TGetResultSetMetadataReq, TGetColumnsReq, TType, TTypeId, \
    TExecuteStatementReq, TGetOperationStatusReq, TFetchOrientation, TCloseOperationReq, \
    TCloseSessionReq, TGetSchemasReq, TCancelOperationReq, TGetLogReq

auth = 'PLAIN'
username = 'apanin'
password = 'none'
host = 'cloudera'
port = 10000
test_hql1 = 'select count(*) from test_text'


def sasl_factory():
    saslc = sasl.Client()
    saslc.setAttr("username", username)
    saslc.setAttr("password", password)
    saslc.init()
    return saslc


def get_type(typeDesc):
    for ttype in typeDesc.types:
        if ttype.primitiveEntry is not None:
            return TTypeId._VALUES_TO_NAMES[ttype.primitiveEntry.type]
        elif ttype.mapEntry is not None:
            return ttype.mapEntry
        elif ttype.unionEntry is not None:
            return ttype.unionEntry
        elif ttype.arrayEntry is not None:
            return ttype.arrayEntry
        elif ttype.structEntry is not None:
            return ttype.structEntry
        elif ttype.userDefinedTypeEntry is not None:
            return ttype.userDefinedTypeEntry


def get_value(colValue):
    if colValue.boolVal is not None:
      return colValue.boolVal.value
    elif colValue.byteVal is not None:
      return colValue.byteVal.value
    elif colValue.i16Val is not None:
      return colValue.i16Val.value
    elif colValue.i32Val is not None:
      return colValue.i32Val.value
    elif colValue.i64Val is not None:
      return colValue.i64Val.value
    elif colValue.doubleVal is not None:
      return colValue.doubleVal.value
    elif colValue.stringVal is not None:
      return colValue.stringVal.value


sock = TSocket(host, port)
transport = TSaslClientTransport(sasl_factory, "PLAIN", sock)
client = TCLIService.Client(TBinaryProtocol(transport))
transport.open()

res = client.OpenSession(TOpenSessionReq(username=username, password=password))
session = res.sessionHandle

query1 = TExecuteStatementReq(session, statement=test_hql1, confOverlay={}, runAsync=True)
response1 = client.ExecuteStatement(query1)
opHandle1 = response1.operationHandle


while True:
    time.sleep(1)

    q1 = TGetOperationStatusReq(operationHandle=opHandle1)
    res1 = client.GetOperationStatus(q1)

    lq = TGetLogReq(opHandle1)
    rq = client.GetLog(lq)

    if res1.operationState == 2:
        break


req = TCloseOperationReq(operationHandle=opHandle1)
client.CloseOperation(req)

req = TCloseSessionReq(sessionHandle=session)
client.CloseSession(req)

How realtime capture logs of hive query from HiveServer2?

UPD Hive version - 1.2.1


Solution

  • For getting logs of operation used method FetchResults with param fetchType=1 - returning logs.

    Example usage:

    query1 = TExecuteStatementReq(session, statement=test_hql1, confOverlay={}, runAsync=True)
    response1 = client.ExecuteStatement(query1)
    opHandle1 = response1.operationHandle
    
    while True:
        time.sleep(1)
    
        q1 = TGetOperationStatusReq(operationHandle=opHandle1)
        res1 = client.GetOperationStatus(q1)
    
        request_logs = TFetchResultsReq(operationHandle=opHandle1, orientation=0, maxRows=10, fetchType=1)
        response_logs = client.FetchResults(request_logs)
    
        print response_logs.results
    
        if res1.operationState == 2:
            break