Dictionary python specific key -


i have code scrape out specific web page, want build code can me know specific details, example if enter style id, should give me details related it, or if enter category, should give me items in category details. code is:-

import requests, re bs4 import beautifulsoup url="http://www.barneys.com/theory-andrejs-sweater-503900006.html#start=2" r=requests.get(url) soup=beautifulsoup(r.content) links=soup.find_all("a") img=soup.find(itemprop="image") g_d4=soup.find_all("ol", {"class":"breadcrumb"}) item in g_d4:     links_2=soup.find_all('a', href=re.compile('^http://www.barneys.com/barneys-new-york/men/'))     pattern_2=re.compile("clothing/(\w+)")     link in links_2:         match_1=pattern_2.search(link["href"])         if match_1:             print ("category:- " + match_1.group(1))             break g_d1 = soup.find_all("div", {"id": "product-content"}) item in g_d1:   try:       print ("\n\nbrand:-" + item.contents[1].text)   except:       pass   try:       a_1=item.find("ol", {"class":"breadcrumb"})       a_2=a_1.text       print a_2   except:       pass   try:       print ("type:-" + item.find("h1",{"class":"product-name"},{"itemprop":"name"}).text+';')   except:       pass   try:       d2=item.find("div",{"class":"panel-body standard-p"})       d3=d2.text       p_id=re.findall(r'[0-9]{9}',d3)       id_2=p_id[0]       url_1 = 'http://recs.richrelevance.com/rrserver/p13n_generated.js?a=dbeab3c977a08905&ts=1434386243747&p='+str(id_2)+'&pt=%7citem_page.rr1%7citem_page.featured_item_0%7citem_page.featured_item_1%7citem_page.featured_item_2%7citem_page.featured_item_3&u=mvbbr9wkg1pj7zehlfmnxwzrp4wgmedlg4m%3d&s=mvbbr9wkg1pj7zehlfmnxwzrp4wgmedlg4m%3d&cts=http%3a%2f%2fwww.barneys.com&chi=%7cmens-shirts-dress-classic&flv=18.0.0&rcs=ef4nyjeogcambdcfybs0obqfyg28bhrihnzu88v68sjxf881tdusq6hytimwomrgm9gkh9fpzo21oln3qbt3oguyocatzpgrp7a2emy&l=1'       r_1= requests.get(url_1)       pattern = re.compile(r'(?<=p=)[0-9]+(?=&)')       product_ids = pattern.findall(str(r_1.content))       print ("details:- " + d3+';')       print ("\nstyle id:- " + id_2+';')       print ("\nrecommended product id's:- ")       print (','.join(i in product_ids))   except:       pass   try:       print ("\nurl:-" + img["src"]+';')   except:       pass   try:       print ("\nfull price:-" + item.find("span",{"class":"price-standard"}).text+';')   except:       pass   try:       print ("\ndiscounted price:-" + item.find("span",{"class":"price-sales"}).text+';')   except:       pass g_d2=soup.find_all("div", {"class":"color-scroll"}) pattern_1=re.compile("pid=(\w+)") item in g_d2:     links_1=soup.find_all('a', href=re.compile('^/on/demandware.store/sites-bny-site/default/product-variation'))     link in links_1[1:]:         match=pattern_1.search(link["href"])         if match:             print ("\nproduct id of other color:-")             print (match.group(1))  

i added dictionary called d

import requests, re bs4 import beautifulsoup  d={}  url="http://www.barneys.com/theory-andrejs-sweater-503900006.html#start=2" r=requests.get(url) soup=beautifulsoup(r.content) links = soup.find_all("a")  d["links"] = []  d["links"].append(("href", [link.get("href") link in links])) d["links"].append(("class", [link.get("class") link in links]))  img=soup.find(itemprop="image") d["img"] = []  d["img"].append([("alt", img.get("alt")), ("src", img.get("src")), ("itemprop", img.get("itemprop")), ("class", img.get("class")[0])]) #you have put d["img"]["0"] instead of d["img"]["alt"]    g_d4=soup.find_all("ol", {"class":"breadcrumb"}) item in g_d4:     links_2=soup.find_all('a', href=re.compile('^http://www.barneys.com/barneys-new-york/men/'))     pattern_2=re.compile("clothing/(\w+)")     link in links_2:         match_1=pattern_2.search(link["href"])         if match_1:             print ("category:- " + match_1.group(1))             break g_d1 = soup.find_all("div", {"id": "product-content"}) item in g_d1:   try:       d["brand"] = item.contents[1].text       print ("\n\nbrand:-" + item.contents[1].text)   except:       pass   try:       a_1=item.find("ol", {"class":"breadcrumb"})       a_2=a_1.text       d["a_2"] = a_2       print a_2   except:       pass   try:       print ("type:-" + item.find("h1",{"class":"product-name"},{"itemprop":"name"}).text+';')        d["type"] = item.find("h1",{"class":"product-name"},{"itemprop":"name"}).text   except:       pass   try:       d2=item.find("div",{"class":"panel-body standard-p"})       d3=d2.text       p_id=re.findall(r'[0-9]{9}',d3)       id_2=p_id[0]       url_1 = 'http://recs.richrelevance.com/rrserver/p13n_generated.js?a=dbeab3c977a08905&ts=1434386243747&p='+str(id_2)+'&pt=%7citem_page.rr1%7citem_page.featured_item_0%7citem_page.featured_item_1%7citem_page.featured_item_2%7citem_page.featured_item_3&u=mvbbr9wkg1pj7zehlfmnxwzrp4wgmedlg4m%3d&s=mvbbr9wkg1pj7zehlfmnxwzrp4wgmedlg4m%3d&cts=http%3a%2f%2fwww.barneys.com&chi=%7cmens-shirts-dress-classic&flv=18.0.0&rcs=ef4nyjeogcambdcfybs0obqfyg28bhrihnzu88v68sjxf881tdusq6hytimwomrgm9gkh9fpzo21oln3qbt3oguyocatzpgrp7a2emy&l=1'       r_1= requests.get(url_1)       pattern = re.compile(r'(?<=p=)[0-9]+(?=&)')       product_ids = pattern.findall(str(r_1.content))       print ("details:- " + d3+';')       d["details"] = d3.split(",")       print ("\nstyle id:- " + id_2+';')       d["style"] = ("id", id_2)       print ("\nrecommended product id's:- ")       print (','.join(i in product_ids))       d["recommendedproductids"] = [i in product_ids]   except:       pass   try:       print ("\nurl:-" + img["src"]+';')   except:       pass   try:       print ("\nfull price:-" + item.find("span",{"class":"price-standard"}).text+';')   except:       pass   try:       print ("\ndiscounted price:-" + item.find("span",{"class":"price-sales"}).text+';')   except:       pass g_d2=soup.find_all("div", {"class":"color-scroll"}) pattern_1=re.compile("pid=(\w+)") item in g_d2:     links_1=soup.find_all('a', href=re.compile('^/on/demandware.store/sites-bny-site/default/product-variation'))     link in links_1[1:]:         match=pattern_1.search(link["href"])         if match:             print ("\nproduct id of other color:-")             print (match.group(1))  

Comments

Popular posts from this blog

javascript - Using jquery append to add option values into a select element not working -

Android soft keyboard reverts to default keyboard on orientation change -

Rendering JButton to get the JCheckBox behavior in a JTable by using images does not update my table -