Search code examples
pythonarcgisarcpyarcmap

How to check for empty values in fields of *multiple* shape files?


I have one main folder and it has multiple sub-folder. Each sub-folder has one shapefile. I want to test all fields of the shape file which are having null values. If shape files filed have null values then print the shape file name along with field name.

I have found code but it only works for one shape file.

import arcpy
fc = r'C:\Y4YK\Muni'
fields = dict((f.name, []) for f in arcpy.ListFields(fc) if not f.required)

rows = arcpy.SearchCursor(fc,"","","","")
for row in rows:
    for f in fields.keys():
        fields[f].append(row.getValue(f))

    for field, values in fields.iteritems():
        if any(map(lambda s: s is None or not str(s).strip(), values)):
            print 'Field: "{}" has empty values'.format(field)

Solution

  • I think it should be faster to first check using sql query if any row have None values in them before Reading each row using the cursor (btw use the Data Access cursors (da.SearchCursor), they are alot faster).

    Try:

    import arcpy, os
    
    arcpy.env.overwriteOutput = True #To be able to use same layer name in MakeFeatureLayer
    
    shapefolder = r'C:\GIS\data\testdata'
    
    for path, subdirs, files in os.walk(shapefolder):
        for name in files:
            if name.endswith('.shp'):
                shapefile = os.path.join(path,name)
                fields_to_check = [f.name for f in arcpy.ListFields(shapefile) if not f.required]
    
                sql = ' OR '.join([field+" IS NULL" for field in fields_to_check]) #Construct sql query like: 'Field1 IS NULL OR Field2 IS NULL OR ...'
                arcpy.MakeFeatureLayer_management(in_features=shapefile, out_layer='layer', where_clause=sql) #Use the sql clause to create a temporary layer
    
                shapefile_row_count = int(arcpy.GetCount_management(in_rows=shapefile).getOutput(0))
                if int(arcpy.GetCount_management(in_rows='layer').getOutput(0)) >= shapefile_row_count and shapefile_row_count >0: #Check if row number returned by query are >= to shapefile row count
                    nonefields = []
                    with arcpy.da.SearchCursor('layer', fields_to_check) as cursor:
                        for row in cursor:
                            if None in row:
                                nones = [fields_to_check[j] for j in [i for i in range(len(row)) if row[i] is None]]
                                nonefields.extend(nones)
                    nonefields = ', '.join(sorted(list(set(nonefields))))
                    print 'None value(s) in shapefile: {}, field(s): {}'.format(shapefile, nonefields)
    

    Should output something like:

    None value(s) in shapefile: C:\GIS\data\testdata\intertest.shp, field(s): fieldname1, fieldname2, fieldname10
    None value(s) in shapefile: C:\GIS\data\testdata\polygons.shp, field(s): blabla, blablabla