Web_scraping_using_Beautiful_soup

import pandas as pd
import re
import requests
from bs4 import BeautifulSoup 
import csv

#returns a list of all the genres 

def all_topics():
   alltopics = []
   url = "https://www.amazon.in/gp/bestsellers/books/ref=zg_bs_pg_1?ie=UTF8&pg=1
   req = requests.get(url)
   soup = BeautifulSoup(req.content , "html.parser")
   for d in soup.findAll('div', attrs={'class':"a-fixed-left-grid-col a-col-left"}):
    d = d.find('ul').find('ul')
    for tag in d.find_all(re.compile("^a")):
      alltopics.append(str(tag.string))
   return(alltopics)


def book_details():
  names = ["BOOKS"]
  ratings = ["RATINGS"]
  authors = ["AUTHORS"]
  prices = ["PRICE"]
  positions = ["POPULARITY"]
  no_of_reviews = ["NUMBER OF REVIEWS"]
  image_urls = ["LINKS TO THE COVER OF THE BOOK"]

  #There are 2 pages that are being scraped
  pgNo = 1
  
  while pgNo < 3:
    
    url = "https://www.amazon.in/gp/bestsellers/books/ref=zg_bs_pg_"+str(pgNo)+"?ie=UTF8&pg="+str(pgNo)
    
    #The URL that is scraped 
    
    pgNo += 1
    req = requests.get(url)
    soup = BeautifulSoup(req.content , "html.parser")

    for d in soup.findAll('ol', attrs={'class':"a-ordered-list a-vertical" , "id": "zg-ordered-list" ,"role" : "grid"}):
      for tag in d.find_all(re.compile("^li")):
        bookname = tag.find('div' , attrs={"aria-hidden":"true","class":"p13n-sc-truncate p13n-sc-line-clamp-1" , "data-rows":"1"} )
        rating = tag.find('span' , attrs = {"class":"a-icon-alt"})
        price = tag.find('span' , attrs = {"class" :'p13n-sc-price'} )
        author = tag.find('a' , attrs = {"class" : "a-size-small a-link-child"})
        position = tag.find('span' , attrs={"class": 'zg-badge-text'})
        review = tag.find('a' , attrs={"class" :"a-size-small a-link-normal"})
        names.append(str(bookname.string).lstrip(' ').replace('\n' , ''))
        positions.append(position.string)
        img_url = tag.find('div' , attrs={"class":"a-section a-spacing-small"})
        if img_url.find('img') == None:
          image_urls.append("NA")
        else:
          image_urls.append(img_url.find('img').get('src'))
        if review == None:
          no_of_reviews.append("NA")
        else:
          no_of_reviews.append(str(review.text).replace('\xa0', ""))
        if price == None:
          prices.append("NA")
        else:
          prices.append(str(price.string)[1:].replace('\xa0' , ""))
        if author == None:
          authors.append("NA")
        else:
          authors.append(author.string)
        if rating == None:
          ratings.append("NA")
        else:
          ratings.append(str(rating.text).replace('\xa0', ""))

    for i,j in enumerate(names):
      names[i] = names[i].strip(' ')
  details = (list(zip(positions ,names , authors , ratings , prices , no_of_reviews , image_urls)))
  
  return(details)

#The contents are written into a CSV file

with open('Top100books.csv', 'w') as f:
    writer = csv.writer(f , lineterminator='\n')
    for tup in book_details():
        writer.writerow(tup)