Search code examples
bashgoogle-cloud-platformscriptinggcloud

optimize my bash script with gcloud commands


Could you help me with optimizing this script for me? This is grabbing all the meta data from our total organization and currently this script takes any where between 12-13 hours to complete. But I want to run this multiple times a weeks, is there a way to optimize it and get the results faster?

#!/usr/bin/env bash
PROJECTS=$(gcloud projects list --format='get(projectId)')
FILE="instances.csv"
rm $FILE
touch $FILE
START_TIME=$SECONDS
COUNTER=1
TOTAL=0
echo 'name,organization,space,zone,machine_type,preemptible,internal_ip,external_ip,status,service_account,project_id' >> $FILE
for project_id in $PROJECTS
do
  ((TOTAL=TOTAL+1))
done
for project_id in $PROJECTS
do
  echo 'project ' $COUNTER 'of' $TOTAL
  echo $project_id
  ORG=$(gcloud projects describe $project_id --format="get(labels.organization)")
  echo $ORG
  SPACE=$(gcloud projects describe $project_id --format="get(labels.space)")
  echo $SPACE
  gcloud compute instances list --project $project_id --format="csv(
   name,
   format("$ORG",''),
   format("$SPACE", ''),
   zone.basename(),
   machineType.machine_type().basename(),
   scheduling.preemptible.yesno(yes=true, no=''),
   networkInterfaces[].networkIP.notnull().list():label=INTERNAL_IP,
   networkInterfaces[].accessConfigs[0].natIP.notnull().list():label=EXTERNAL_IP,
   status,
   serviceAccounts[0].email
  )" | awk 'NR>1' | sed s/$/,$project_id/ >> $FILE
  ((COUNTER=COUNTER+1))
  ELAPSED_TIME=$(($SECONDS - $START_TIME))
  echo 'Elapsed time -' "$(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) sec"
done
ELAPSED_TIME=$(($SECONDS - $START_TIME))
  echo 'Elapsed time -' "$(($ELAPSED_TIME/60)) min $(($ELAPSED_TIME%60)) sec"

Solution

  • I found the challenge interesting, especially with concurrency in Go, and I made it.

    package main
    
    import (
        "context"
        "fmt"
        "google.golang.org/api/cloudresourcemanager/v1"
        "google.golang.org/api/compute/v1"
        "net/http"
        "strings"
        "sync"
    )
    
    var listSeparator = ";"
    
    func main() {
        http.HandleFunc("/", ListProject)
        http.ListenAndServe(":8080", nil)
    }
    
    func ListProject(w http.ResponseWriter, r *http.Request) {
        ctx := context.Background()
        svc, err := cloudresourcemanager.NewService(ctx)
        if err != nil {
            w.WriteHeader(http.StatusInternalServerError)
            return
        }
    
        svcCompute, err := compute.NewService(ctx)
        if err != nil {
            w.WriteHeader(http.StatusInternalServerError)
            return
        }
    
        prjlist, err := svc.Projects.List().Do()
        if err != nil {
            w.WriteHeader(http.StatusInternalServerError)
            return
        }
    
        waitGroup := &sync.WaitGroup{}
        c := make(chan line)
        finished := make(chan string)
    
        waitGroup.Add(len(prjlist.Projects))
    
        for _, prj := range prjlist.Projects {
            go listInstances(*prj, svcCompute, c, waitGroup)
        }
        go func() {
            waitGroup.Wait()
            finished <- ""
        }()
    
        fmt.Fprint(w, "name,organization,space,zone,machine_type,preemptible,internal_ip,external_ip,status,service_account,project_id\n")
    
        loop:
            for {
                select {
                case l := <-c:
                    fmt.Fprintf(w, "%s,%s,%s,%s,%s,%t,%s,%s,%s,%s,%s\n", l.projectName,l.organization,l.space,l.zone,l.machineType,l.preemtible,l.internalIps,l.externalIps,l.status,l.serviceAccount,l.projectId)
                case <- finished:
                    break loop
                }
            }
    }
    
    func listInstances(prj cloudresourcemanager.Project, svcCompute *compute.Service, c chan line, group *sync.WaitGroup) {
        defer group.Done()
    
        zoneList, err := svcCompute.Zones.List(prj.ProjectId).Do()
        if err != nil {
            fmt.Println(err)
            return
        }
    
        group.Add(len(zoneList.Items))
    
        for _, zone := range zoneList.Items {
            go listInstanceInZone(*zone, prj, svcCompute, c, group)
        }
    }
    
    func listInstanceInZone(zone compute.Zone, prj cloudresourcemanager.Project, svcCompute *compute.Service, c chan line, group *sync.WaitGroup) {
        defer group.Done()
        instList, err := svcCompute.Instances.List(prj.ProjectId, zone.Name).Do()
        if err != nil {
            fmt.Println(err)
            return
        }
        for _, inst := range instList.Items {
    
            internalIps := ""
            externalIps := ""
    
            for _, n := range inst.NetworkInterfaces {
                for _, a := range n.AccessConfigs {
                    if externalIps != "" {
                        externalIps += listSeparator
                    }
                    externalIps += a.NatIP
                }
                if internalIps != "" {
                    internalIps += listSeparator
                }
                internalIps += n.NetworkIP
            }
    
            sacEmails := ""
            for _, s := range inst.ServiceAccounts {
                if sacEmails != "" {
                    sacEmails += listSeparator
                }
                sacEmails += s.Email
            }
    
            machineTypeLastIndex := strings.LastIndex(inst.MachineType,"/")
            l := line{
                projectName:    prj.Name,
                projectId:      prj.ProjectId,
                organization:   prj.Labels["labels.organization"],
                space:          prj.Labels["labels.space"],
                zone:           zone.Name,
                machineType:    inst.MachineType[machineTypeLastIndex+1:],
                preemtible:     inst.Scheduling.Preemptible,
                internalIps:    internalIps,
                externalIps:    externalIps,
                status:         inst.Status,
                serviceAccount: sacEmails,
            }
            c <- l
        }
    
    }
    
    type line struct {
        projectName    string
        projectId      string
        organization   string
        space          string
        zone           string
        machineType    string
        preemtible     bool
        internalIps    string
        externalIps    string
        status         string
        serviceAccount string
    }
    

    There might have more elegant, or some issue in the expected results, but it works in few seconds instead of few minutes (without concurrency)