Search code examples
pythonapache-kafkapython-multithreadingthreadpoolexecutorconfluent-kafka-python

Not receiving messages from Kafka Topic


I am receiving None when calling poll() in this program but I am getting the messages when running the kafka-console-consumer.bat from cmd, I can't figure out what exactly the problem.

The execution starts from main.py

from queue import Queue
from concurrent.futures import ThreadPoolExecutor
import time
import json
from kafka_message_consumer import KafkaMessageConsumer
from kafka_discovery_executor import KafkaDiscoveryExecutor


with open('kafka_properties.json') as f:
    kafka_properties = json.loads(f.read())

message_queue = Queue()
kafka_message_consumer = KafkaMessageConsumer(kafka_properties, message_queue)
kafka_discovery_executor = KafkaDiscoveryExecutor(message_queue, kafka_properties)

with ThreadPoolExecutor(max_workers=5) as executor:
    executor.submit(kafka_message_consumer.run())
    time.sleep(1)
    executor.submit(kafka_discovery_executor.run())
    time.sleep(1)

KafkaDiscoveryExecutor class is for consuming messages from shared queue and processing that messages.

This is kafka_message_consumer.py

import logging
from confluent_kafka import Consumer


class KafkaMessageConsumer:

    def __init__(self, kafka_properties, message_queue):
        self.message_queue = message_queue
        self.logger = logging.getLogger('KafkaMessageConsumer')
        self.kafka_stream_consumer = None
        self.create_consumer(kafka_properties)

    def create_consumer(self, kafka_properties):
        """
        Create an instance of Kafka Consumer with the consumer configuration properties
        and subscribes to the defined topic(s).
        """

        consumer_config = dict()

        # Consumer configuration properties.
        consumer_config['bootstrap.servers'] = kafka_properties.get('bootstrap.servers')
        consumer_config['group.id'] = kafka_properties.get('group.id')
        consumer_config['enable.auto.commit'] = True
        consumer_config['auto.offset.reset'] = 'earliest'
        
        # For SSL Security
        # consumer_config['security.protocol'] = 'SASL_SSL'
        # consumer_config['sasl.mechanisms'] = 'PLAIN'
        # consumer_config['sasl.username'] = ''
        # consumer_config['sasl.password'] = ''

        # Create the consumer using consumer_config.
        self.kafka_stream_consumer = Consumer(consumer_config)

        # Subscribe to the specified topic(s).
        self.kafka_stream_consumer.subscribe(['mytopic'])

    def run(self):
        while True:
            msg = self.kafka_stream_consumer.poll(1.0)
            if msg is None:
                # No message available within timeout.
                print("Waiting for message or event/error in poll()")
                continue
            elif msg.error():
                print("Error: {}".format(msg.error()))
            else:
            # Consume the record.
            # Push the message into message_queue
                try:
                    self.message_queue.put(msg)
                except Exception as e:
                    self.logger.critical("Error occured in kafka Consumer: {}".format(e))

The specified topic has events but I am getting None here and the print statement inside 'if msg is None:' is executing.


Solution

  • I am still not sure as to why the above code is not working as it should.

    Here's what changes I made to make this code work

    1. I used threading module instead of concurrent.futures
    2. used daemon thread
    3. make a call to thread.init() inside the constructor of the classes [KafkaMessageConsumer, KafkaDiscoveryExecutor]

    Here's main.py

    from queue import Queue
    import threading
    import time
    import json
    from kafka_message_consumer import KafkaMessageConsumer
    from kafka_discovery_executor import KafkaDiscoveryExecutor
    
    def main():
        with open('kafka_properties.json') as f:
            kafka_properties = json.loads(f.read())
    
        message_queue = Queue()
       
        threads = [
            KafkaMessageConsumer(kafka_properties, message_queue),
            KafkaDiscoveryExecutor(message_queue, kafka_properties)
        ]
    
        for thread in threads:
            thread.start()
            time.sleep(1)
    
        for thread in threads:
            thread.join()
    
        time.sleep(1)
    
    if __name__ == "__main__":
        main()
    

    and kafka_message_consumer.py

    import logging
    from confluent_kafka import Consumer
    import threading
    
    
    class KafkaMessageConsumer(threading.Thread):
        daemon = True
    
        def __init__(self, kafka_properties, message_queue):
            threading.Thread.__init__(self)
            self.message_queue = message_queue
            self.logger = logging.getLogger('KafkaMessageConsumer')
            self.kafka_stream_consumer = None
            self.create_consumer(kafka_properties)
    
        def create_consumer(self, kafka_properties):
            """
            Create an instance of Kafka Consumer with the consumer configuration properties
            and subscribes to the defined topic(s).
            """
    
            consumer_config = dict()
    
            # Consumer configuration properties.
            consumer_config['bootstrap.servers'] = kafka_properties.get('bootstrap.servers')
            consumer_config['group.id'] = kafka_properties.get('group.id')
            consumer_config['enable.auto.commit'] = True
            consumer_config['auto.offset.reset'] = 'earliest'
            
    
            # Create the consumer using consumer_config.
            self.kafka_stream_consumer = Consumer(consumer_config)
    
            # Subscribe to the specified topic(s).
            self.kafka_stream_consumer.subscribe(['mytopic'])
    
        def run(self):
            while True:
                msg = self.kafka_stream_consumer.poll(1.0)
                if msg is None:
                    # No message available within timeout.
                    print("Waiting for message or event/error in poll()")
                    continue
                elif msg.error():
                    print("Error: {}".format(msg.error()))
                else:
                # Consume the record.
                # Push the message into message_queue
                    try:
                        self.message_queue.put(msg)
                    except Exception as e:
                        self.logger.critical("Error occured in kafka Consumer: {}".format(e))
            self.kafka_stream_consumer.close()