I have the following source code. I upload a csv file and write to a table in BigQuery. I need to include code that only that csv file can be saved in the table if there are 5 rows in the csv. If there are not 5 rows, stop the process.
code
with open('/tmp/{}'.format(input_file), "r") as csvfile:
lines = len(list(csvfile))-1
csvfile.seek(0)
reader = csv.reader(csvfile)
for i, row in enumerate(reader):
# add header
if add_header:
if (i == 0):
header_value = row[0:]
lst_csvfile.append(header_value)
add_header = False
# add rows
if (i > 0):
# transform cpf
new_row = [trata_cpf(row[0]), row[1], row[2]]
lst_csvfile.append(new_row)
# write gcs
db_data.to_csv('/tmp/{}'.format(input_file) ,index=False)
gcs_upload('{}'.format(input_file), '/tmp/{}'.format(input_file), gcs_bucket_temp)
print('Encrypt File DONE: {}'.format(input_file))
You have the right idea here using lines = len(list(csvfile))-1
to determine how many non-header lines (records) there are in the file. You can add a simple if statement to skip the loop or return from the method:
with open('/tmp/{}'.format(input_file), "r") as csvfile:
lines = len(csvfile.readlines()) - 1
csvfile.seek(0)
reader = csv.reader(csvfile)
if lines < 5:
return # assuming you do not want the last 3 lines to execute
for i, row in enumerate(reader):
# rest of code
if you need the final lines to execute wrap in an else statement:
lines = len(csvfile.readlines()) - 1
csvfile.seek(0)
reader = csv.reader(csvfile)
if lines >= 5:
for i, row in enumerate(reader):
# rest of code
# write gcs
db_data.to_csv('/tmp/{}'.format(input_file) ,index=False)
gcs_upload('{}'.format(input_file), '/tmp/{}'.format(input_file), gcs_bucket_temp)
print('Encrypt File DONE: {}'.format(input_file))