Tuesday, January 5, 2021

Get Google Search list in CSV file with using Python Code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from datetime import datetime
import csv
import pandas as pd
import requests
from bs4 import BeautifulSoup
# Performing google search using Python code
class Gsearch_python:
   def __init__(self,name_search):
      self.name = name_search
   def Gsearch(self):
      Listresult =[]
      GoogleList =[]
      
      #count = 0
      try :
         from googlesearch import search
      except ImportError:
         print("No Module named 'google' Found")
      for i in search(query=self.name,tld='co.in',lang='en',num=10,stop=1000,pause=2):
         GoogleList.append(i)
         
      now = datetime.now()
      date_time = now.strftime("%d%m%Y %H%M%S")
      my_df = pd.DataFrame(GoogleList)
      FilePath =r'F:\Technology\Python\DataScraping\Data' + '/' + str(date_time) +' GoogleList.csv'
      my_df.to_csv(FilePath, index=False, header=False)
         #count += 1
         #print (count)
         #print(i + '\n')


      for weburl in GoogleList:
         try:
            page = requests.get(weburl)
            soup = BeautifulSoup(page.content, 'html5lib')
            for line in soup.find_all('a'):
               if line.get('href'):
                  indexContobj = line.get('href').find("contact")
                  if indexContobj >= 0:
                     indexHtpobj = line.get('href').find("http")
                     #print(indexHtpobj)
                     if indexHtpobj >= 0:
                        #print(line.get('href'))
                        Listresult.append(line.get('href'))
         except ImportError:
            print("Connection Error")
         

                  
      #print(Listresult)
      mylist = list(dict.fromkeys(Listresult))
      now = datetime.now()
      date_time = now.strftime("%d%m%Y %H%M%S")
      FilePath =r'F:\Technology\Python\DataScraping\Data' + '/' + str(date_time) +' Listresult.csv'
      my_df = pd.DataFrame(mylist)
      my_df.to_csv(FilePath, index=False, header=False)
      

if __name__=='__main__':
   gs = Gsearch_python("real estate developers in Pune")
   gs.Gsearch()
   print("Completed")

No comments:

Post a Comment