Table Scraper Library

1 minute read

This is a small module which help you get data from websites in a form of a table.

1. Start by installing the requirements.txt

  pip install  -r requirements.txt

beautifulsoup4 tabulate requests

2. Call main.py function to collect the data table you want.

py main.py

# main.py
from EzScrap import Es 
url = input('Enter Url : ')
issam = Es(url)

issam.pross()

3. The scraping module

# EzScrap.py
import requests as req 
from bs4 import BeautifulSoup
from tabulate import tabulate
import csv
class Es:
    def __init__(self,url):
        
        self.url = url
        if not self.url : return print('Enter A url !!')
        self.parse_table()
    def parse_table(self):
        self.res = req.get(self.url)
        
        if'<body>' in self.res.text:
            x = self.res.text.split('<body>')
            y = x[1].split('</body>')
            self.body = y[0]
        else: 
            self.body = self.res.text

        self.doc = BeautifulSoup(self.body,"html.parser")
        self.table_len = len(self.doc.find_all('table'))
    def pross(self):
        
        
        for r in range(self.table_len):
            self.table = self.doc.find_all('table')[r]
            
            self.rows = self.table.find_all('tr')
            self.col = []
            for i in self.rows:
                if i == self.rows[0]:
                    continue
                self.col.append(i.text.split())
            print(tabulate(self.col, headers=self.rows[0].text.split()))
            print('=============================================\n')
            ch = input('Table ('+str(r)+') | Press C to save as csv file | Press Enter to continue | Press Q To Quit : ')
            if ch.lower() == "q":
                break
            if ch.lower() == "c":
                self.table_to_csv(r)
        
    def table_to_csv(self,t_number):
        self.data = []
        self.parsed_data = []
        self.table = self.doc.find_all('table')[t_number]
        self.rows = len(self.table.find_all('tr'))
        for h in range(len(self.table.find_all('td'))):
                if self.table.find_all('td')[h].string == None:
                    print(h,'/',len(self.table.find_all('td')))
                    self.table.find_all('td')[h].string = "NaN"
        for r in range(self.rows):
            arr = self.table.find_all('tr')[r].text.split()
            print('(',r,'/',self.rows,')')
            # set the empty td to NaN 
             
            for f in range(len(arr)):
                if ',' in arr[f]:   
                    print('Replace',f,'/',len(arr))
                    # replace , to " " in numbers 
                    arr[f] = arr[f].replace(',','')
            
            self.parsed_data.append(arr)
            
        with open('data'+str(t_number)+'.csv', 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerows(self.parsed_data)
        print('Data Saved in data'+str(t_number)+'.csv file')

Credits : https://github.com/issamoxix/EzScraper

Share on

Twitter Facebook LinkedIn

Ichrak LAFRAM

Table Scraper Library

1. Start by installing the requirements.txt

2. Call main.py function to collect the data table you want.

3. The scraping module

Share on

You may also enjoy

Webdesign90dayschallenge

Linkedin

Validate Data With Pydantic

Become A Good Data Scientist