Below is the function created to generate counts from the table, but in the query (string) I want to add 'group by' a column 'xyz'. Suggest, how to do the same.
from pyspark import SparkContext, SparkConf
from pyspark.sql import HiveContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
from pyspark.sql.types import *
db = 'database'
schema = 'Schema'
def getCount(table):
string = f"select count(*) as ct from {db}.{schema}." + table
df = spark.read.format(snowflake_name)\
.options(**sfOptions)\
.option('query', string).load()
return df
Well one way would be to alter the f-string slightly
string = f"select some_column, count(*) as ct from {db}.{schema}.{table} group by some_column"