Pyton skripta

Pyton skripta

offline
  • Pridružio: 18 Jul 2003
  • Poruke: 4194
  • Gde živiš: U zlatnom kavezu

Imam payton skriptu koj ne radi sasvim ok, a ja ne mogu da joj nadjem gresku. Ona treba da generise slektovane recenzije hotela u csv fajl i ona do uradi, ali je problem sto bez obzira na broj selekcija ona obradi samo pet. Ako neko zna neka pomogne prilazem kod

# !/usr/bin/python # -*- coding: utf-8 -*- # importing libraries from bs4 import BeautifulSoup import urllib, csv, os, datetime, urllib.request, re, sys # creating CSV file to be used file = open(os.path.expanduser(r"~/Desktop/TripAdviser Reviews.csv"), "wb") file.write(     b"Organization" + b"," + b"Address" + b"," + b"Reviewer" + b"," + b"Review Title" + b"," + b"Review" + b"," + b"Review Count" + b"," + b"Help Count"      + b"," + b"Attraction Count" + b"," + b"Restaurant Count" + b"," + b"Hotel Count" + b"," +  b"Rating Date" + b"," + b"Rating" + b"\n") # List the first page of the reviews (ends with "#REVIEWS") - separate the websites with , WebSites = [     "https://www.tripadvisor.com/Hotel_Review-g294472-d7181993-Reviews-Holiday_Inn_Express_Belgrade_City-Belgrade.html" ] Checker = "REVIEWS" # looping through each site until it hits a break for theurl in WebSites:     thepage = urllib.request.urlopen(theurl)     soup = BeautifulSoup(thepage, 'html.parser')     while True:         # extract the help count, restaurant review count, attraction review count and hotel review count         a = b = 0         helpcountarray = restaurantarray = attractionarray = hotelarray = ""         for profile in soup.findAll(attrs={"class": "memberBadgingNoText"}):             textFromHt = profile.findAll(text=True)             image = '\n'.join(textFromHt)             image = image.replace("\n", "|||||").strip()             # print "".join(profile.findAll(text=True))             if image.find("helpful") > 0:                 counter = image.split("helpful", 1)[0].split("|", 1)[1][-4:].replace("|", "").strip()                 if len(helpcountarray) == 0:                     helpcountarray = [counter]                 else:                     helpcountarray.append(counter)             elif image.find("helpful") < 0:                 if len(helpcountarray) == 0:                     helpcountarray = ["0"]                 else:                     helpcountarray.append("0")             if image.find("attraction") > 0:                 counter = image.split("attraction", 1)[0].split("|", 1)[1][-4:].replace("|", "").strip()                 if len(attractionarray) == 0:                     attractionarray = [counter]                 else:                     attractionarray.append(counter)             elif image.find("attraction") < 0:                 if len(attractionarray) == 0:                     attractionarray = ["0"]                 else:                     attractionarray.append("0")             if image.find("REVIEWS_RESTAURANTS") > 0:                 counter = image.split("REVIEWS_RESTAURANTS", 1)[0].split("|", 1)[1][-4:].replace("|", "").strip()                 if len(restaurantarray) == 0:                     restaurantarray = [0]                 else:                     restaurantarray.append("0")             elif image.find("REVIEWS_RESTAURANTS") < 0:                 if len(restaurantarray) == 0:                     restaurantarray = ["0"]                 else:                     restaurantarray.append("0")             if image.find("REVIEWS_HOTELS") > 0:                 counter = image.split("REVIEWS_HOTELS", 1)[0].split("|", 1)[1][-4:].replace("|", "").strip()                 if len(hotelarray) == 0:                     hotelarray = [counter]                 else:                     hotelarray.append(counter)             elif image.find("REVIEWS_HOTELS") < 0:                 if len(hotelarray) == 0:                     hotelarray = ["0"]                 else:                     hotelarray.append("0")                     # extract the rating count for each user review         #altarray = ""         #for rating in soup.findAll(attrs={"class": "rating reviewItemInline"}):          #   alt = rating.find('img', alt=True)          #   if alt[-5:] == '':           #      if len(altarray) == 0:           #          altarray = [alt]        # else:             # noinspection PyUnboundLocalVariable         #    altarray.append(alt)         Organization = soup.find(attrs={'class': 'heading_title'                                         }).text.replace('"', ' ').replace('Review of', ' ').strip()         Address = soup.findAll(attrs={'class': 'address_search'                                       })[0].text.replace(',', '').replace('\n', ''                                                                           ).strip()         # Loop through each review on the page         for x in range(0, len(hotelarray)):             # noinspection PyBroadException             try:                 Reviewer = soup.findAll(attrs={"class": "username mo"})[x].text             except:                 Reviewer = "N/A"                 continue             Reviewer = Reviewer.replace(',', ' ').replace('”', '').replace('“', '').replace('"', '').strip()             ReviewTitle = soup.findAll(attrs={"class": "quote"})[x].text.replace(',', ' ').replace('”', '').replace('“',                                                                                                                     '').replace(                 '"', '').replace('é', 'e').strip()             Review = soup.findAll(attrs={"class": "entry"})[x].text.replace(',', ' ').replace('\n', ' ').strip()             RatingDate = soup.findAll(attrs={"class": "ratingDate"})[x].text.replace('Reviewed', ' ').replace('NEW',                                                                                                               ' ').replace(                 ',', ' ').strip()             # Rating = altarray[x][:1]             HelpCount = helpcountarray[x]             AttractionCount = attractionarray[x]             Restaurant = restaurantarray[x]             Hotel = hotelarray[x]             Record = Organization + "," + Address + "," + Reviewer + "," + ReviewTitle + "," + Review + "," + "," + HelpCount + "," + AttractionCount + "," + Restaurant + "," + Hotel + ","  + RatingDate + ","             if Checker == "REVIEWS":                 file.write(bytes(Record, encoding="ascii", errors='ignore') + b"\n")         link = soup.find_all(attrs={"class": "nav next taLnk"})         print(Organization)         if len(link) == 0:             break         else:             soup = BeautifulSoup(urllib.request.urlopen('http://www.tripadvisor.com'                                                         + link[0].get('href')), 'html.parser')             print(link[0].get('href'))             Checker = link[0].get('href')[-7:] file.close()



Registruj se da bi učestvovao u diskusiji. Registrovanim korisnicima se NE prikazuju reklame unutar poruka.
Ko je trenutno na forumu
 

Ukupno su 716 korisnika na forumu :: 31 registrovanih, 1 sakriven i 684 gosta   ::   [ Administrator ] [ Supermoderator ] [ Moderator ] :: Detaljnije

Najviše korisnika na forumu ikad bilo je 3028 - dana 22 Nov 2019 07:47

Korisnici koji su trenutno na forumu:
Korisnici trenutno na forumu: _commandos_, _Sale, A.R.Chafee.Jr., aramis s, Drug pukovnik, galijot, gorangogs88, goxin, havoc995, Insan2, Jozo Mrak, kovac9mm, krunomiletic5, Kubovac, Marko Marković, Markobg, Markoni958, Mercury2, mika 001, Milan A. Nikolic, Miskohd, mnn2, mushroom2, nenad812, rovac, shaja1, stoj.milovan, t.mile, VJ, VP3987, yufighter