1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 import requests 4 from bs4 import BeautifulSoup 5 import pandas 6 def gethousedetail(url): 7 info ={} 8 res = requests.get(url) 9 soup = BeautifulSoup(res.text,'html.parser')10 info['title']=soup.select('title')[0].text.strip()11 info['price']=soup.select('.zongjia1 .red20b')[0].text.strip()12 for dd in soup.select('dd'):13 if ':' in dd.text.strip():14 k,v = dd.text.strip().split(':')15 info[k]=v16 return info17 res = requests.get('http://esf.sh.fang.com/')18 domain = 'http://esf.sh.fang.com'19 soup = BeautifulSoup(res.text,'html.parser')20 houseary = []21 for house in soup.select('.houseList dl'):22 urls = domain + house.select('.title a')[0]['href']23 houseary.append(gethousedetail(urls))24 25 df =pandas.DataFrame(houseary)26 df.to_excel('house.xlsx',index=False)