I am trying to scrape a list of names of top products from an e-commerce site. However the result is empty. Want to know what is missing. The output is: Visiting: https://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_nav_0/ End of scraping: https://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_nav_0/
code:
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"github.com/gocolly/colly"
)
func main() {
fetchURL := "https://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_nav_0/"
fileName := "results.csv"
file, err := os.Create(fileName)
if err != nil {
log.Fatal("ERROR: Could not create file %q: %s\n", fileName, err)
return
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
writer.Write([]string{"Sl. No."})
c := colly.NewCollector()
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting: ", r.URL)
})
c.OnHTML(`.a-section a-spacing-none aok-relative`, func(e *colly.HTMLElement) {
number := e.ChildText(".zg-badge-text")
name := e.ChildText(".p13n-sc-truncated")
writer.Write([]string{
number,
name,
})
c.Visit(fetchURL)
fmt.Println("End of scraping: ", fetchURL)
}
You need to add the User-Agent header in order for it to return data. Also it seems p13n-sc-truncated
is a generated class name. You can use the following for example :
package main
import (
"log"
"strings"
"github.com/gocolly/colly"
)
type AmazonData struct {
Index int
Link string
Title string
}
func main() {
c := colly.NewCollector()
var data []AmazonData
count := 1
c.OnHTML(`#zg-ordered-list`, func(e *colly.HTMLElement) {
e.ForEach("li .zg-item", func(_ int, elem *colly.HTMLElement) {
link := elem.DOM.Find("a")
linkHref, _ := link.Attr("href")
data = append(data, AmazonData{
Index: count,
Link: linkHref,
Title: strings.TrimSpace(link.Find("div").Text()),
})
count++
})
log.Println(data)
})
c.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36")
})
c.Visit("https://www.amazon.in/gp/bestsellers/electronics/ref=zg_bs_nav_0/")
}