I able to get the HTTP headers & params but unable to generate response object. The site is - https://www.sacmembership.ca/Search/Search.aspx & i am looking to scrape details for each practitioners . here is the code i reached so far :-
import cookielib
import socket
import urllib
import urllib2
url = 'https://www.sacmembership.ca/Search/Search.aspx'
http_header = {
#"POST" : "https://www.sacmembership.ca/Search/Results.aspx HTTP/1.1",
"Host" : "www.sacmembership.ca",
"Connection" : "keep-alive",
"Content-Length" : "16581",
"Cache-Control" :"max-age=0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Origin": "https://www.sacmembership.ca",
"User-Agent" : "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36",
"Content-Type" : "application/x-www-form-urlencoded",
"Referer" : "https://www.sacmembership.ca/Search/Search.aspx",
"Accept-Encoding" : "gzip, deflate",
"Accept-Language" : "en-US,en;q=0.8"
}
params = {
'ctl00$ContentPlaceHolder1$ddlProfession' : "",
'ctl00$ContentPlaceHolder1$ddlFacility' : "",
'ctl00$ContentPlaceHolder1$txtCity' : "",
'ctl00$ContentPlaceHolder1$ddlProvince' : "",
'ctl00$ContentPlaceHolder1$ddlSortBy' : "LastName",
'ctl00$ContentPlaceHolder1$ddlLanguageOfPractice' : "",
'ctl00$ContentPlaceHolder1$txtEmployerCompanyName' : "",
'ctl00$ContentPlaceHolder1$txtFirstName' : "",
'ctl00$ContentPlaceHolder1$txtLastName' : "",
'ctl00$ContentPlaceHolder1$btnSearch' : "Search"
}
cookie_jar = cookielib.LWPCookieJar()
cookie = urllib2.HTTPCookieProcessor(cookie_jar)
opener = urllib2.build_opener(cookie)
req = urllib2.Request(url, urllib.urlencode(params), http_header)
res = opener.open(req)
html = res.read()
print html
"""
open("tmp.html", "w").write(html)
body = html
"""
Please help me on this
I am able to achieve what i was looking for Using Selenium.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from scrapy import Selector
from selenium.webdriver.support.ui import Select, WebDriverWait
import csv
import time
import requests
from scrapy import Selector as s
driver = webdriver.Firefox()
Links = ['','','','','']
for each in links:
driver.get(each)
time.sleep(02)
driver.find_element_by_id("showAll").click()
time.sleep(04)
source = driver.page_source
sel = s(text=source,type="html")
apartment_listing = sel.xpath('//section[@class="placardHeader"]//a[@class="placardTitle"]//@href').extract()
with open("C:\Users\ssamant\Desktop\Client\Anida\Phase_II\Apartments\\apartment_listing.csv","ab")as export:
for each1 in apartment_listing:
export.write('{}\n'.format(each1))
#New_link = driver.current_url
i = 0
while (i)<21:
driver.find_element_by_class_name('next').click()
time.sleep(02)
source1 = driver.page_source
sel1 = s(text=source1,type="html")
apartment_listing1 = sel.xpath('//section[@class="placardHeader"]//a[@class="placardTitle"]//@href').extract()
with open("C:\Users\ssamant\Desktop\Client\Anida\Phase_II\Apartments\\apartment_listing.csv","ab")as export:
for each2 in apartment_listing1:
export.write('{}\n'.format(each2))
i = i+1