python apache-spark pyspark spark-streaming

how to write.stream in append mode

Getting error output mode not supported when there are streaming aggregations on streaming DataFrames/DataSets without watermark.I wanted to get the out put on the console.

class StructSpark:
  def __init__(self, address, port):
    self.address = address
    self.port = port
    self.spark = SparkSession.builder.appName("StructuredWordcount").getOrCreate()
def getonline(self):
    lines = self.spark.readStream.format('socket').option('host', self.address).option('port', self.port).option(
        'includeTimestamp', 'true').load()
    words = lines.select(split(lines.value, ',').alias("value"), lines.timestamp)
    words1 = words.select((split(words.value[0], ',')).alias("key"),(split(words.value[0], ',')).alias("value"), lines.timestamp)
    windowedCount = words1.withWatermark("timestamp", "10 minutes").groupBy(window(words1.timestamp, "5 minutes", "5 minutes"),words1.key).count()
    windowedCount.createOrReplaceTempView("updates")
    count = self.spark.sql("select * from updates where count > 1")
    with open('/home/vaibhav/Desktop/data.txt', 'a') as file:
        file.write(str(count))
    query = count.writeStream.outputMode("Append").format("console").start()
    query.awaitTermination()

Solution

Since you are performing an aggregation operation in your dstream you can not perform write.stream in append mode.Either use it in 'Complete' mode or perform write.stream before an aggregation operation.