企业邮箱怎么进去_手机软件怎么开发_免费自助建站行吗_html设计一个简单的网页

企业所得税怎么算税率

电子商务网站建设规划书范文、办办网 写字楼、永久使用、不限域名、室内装修设计软件cad

深圳市住房建设局官网?

企业邮箱怎么进去_手机软件怎么开发_免费自助建站行吗_html设计一个简单的网页

proxies = []def write_to_mongo(ips, city):'''将数据写入mongoDB'''client = Client(host='localhost', port=27017)db = client['fs_db']coll = db[city + '_good']for ip in ips:coll.insert_one({'name': ip[0], \'price': ip[1],'addresses': ip[2],'areas': ip[3],'eq': ip[4]})client.close()def read_from_mongo(city):client = Client(host='localhost', port=27017)db = client['fs_db']coll = db[city + '_good']li = coll.find()client.close()return liclass Consumer(threading.Thread):def __init__(self, args):threading.Thread.__init__(self, args=args)def run(self):global is_crawurl_demo, i, city_id, lock = self._argsprint("{}, 第{}页".format(city[city_id], i))url = url_demo.format(i)soup = get_real(url)names = []for name in soup.select('.tit_shop'):names.append(name.text.strip())addresses = []for item in soup.find_all('p', attrs={'class': 'add_shop'}):address = item.a.text + " " + item.span.textaddresses.append(address.replace('\t', '').replace('\n', ''))es = []for item in soup.find_all('p', attrs={'class': 'tel_shop'}):es.append(item.text.replace('\t', '').replace('\n', ''))moneys = []for money in soup.find_all("span", attrs={"class": 'red'}):moneys.append(money.text.strip())areas = []for area in soup.find_all('dd', attrs={'class': 'price_right'}):areas.append(area.find_all('span')[-1].text)houses = []for idx in range(len(names)):try:item = [names[idx], moneys[idx], addresses[idx], areas[idx], es[idx]]print(item)houses.append(item)except Exception as e:print(e)lock.acquire()write_to_mongo(houses, e_city[city_id])lock.release()print("线程结束{}".format(i))def dict2proxy(dic):s = dic['type'] + '://' + dic['ip'] + ':' + str(dic['port'])return {'http': s, 'https': s}def get_real(url):resp = requests.get(url, headers=header)soup = BeautifulSoup(resp.content, 'html.parser', from_encoding='gb18030')if soup.find('title').text.strip() == '跳转...':pattern1 = re.compile(r"var t4='(.*?)';")script = soup.find("script", text=pattern1)t4 = pattern1.search(str(script)).group(1)pattern1 = re.compile(r"var t3='(.*?)';")script = soup.find("script", text=pattern1)t3 = re.findall(pattern1, str(script))[-2]url = t4 + '?' + t3HTML = requests.get(url, headers=header)soup = BeautifulSoup(HTML.content, 'html.parser', from_encoding='gb18030')elif soup.find('title').text.strip() == '访问验证-房天下':passreturn soupdef read_proxies():client = Client(host='localhost', port=27017)db = client['proxies_db']coll = db['proxies']# 先检测,再写入,防止重复dic = list(coll.find())client.close()return dicdef craw():lock = threading.Lock()for idx in trange(len(e_city)):url = eshouse[idx]soup = get_real(url.format(2))try:page_number = int(soup.find('div', attrs={'class': 'page_al'}).find_all('span')[-1].text[1:-1])pages = list(range(1, page_number + 1))except:pages = list(range(1, 101))url_demo = urlts = []# pages = [1, 2, 3]while len(pages) != 0:for i in range(10):t = Consumer((url_demo, pages.pop(), idx, lock))t.start()ts.append(t)if len(pages) == 0:breakfor t in ts:t.join()ts.remove(t)if __name__ == '__main__':craw() 昆明房产网室内设计联盟邀请码多少钱苏州网站开发公司排名上海到北京火车钓鱼网站是怎么做出来的国内小众电商平台注册电商平台需要多少钱万户网络ezeip深圳哪个区最繁华排名网络组建与维护实训个人总结东莞横沥房价站酷海洛图库官网中国建设银行网上登录入口官网目前最火的app有哪些武汉建设信息招投标平台上海梅林的网络营销方式互联网之光博览会视频软件开发公司名称大全微信小程序商城排名平面设计难不难学?制作广告的app郑州中原区疫情最新消息计算机网页制作培训班云南昆明企业服务公司名字网络工程 网络工程师网络营销课程论文3000字网站建设问题公司内外网搭建网络设计实训报告大连网站排名优化公司

猜你喜欢

  • 友情链接:
  • 济南公司建网站公司排行榜 软件设计师中级报名时间 海口疫情最新通知 国家企业系统信息查询 泰安人才网司机招聘 四川省成都市锦江区疫情最新情况