Scrapping WSJ Market Data

Python is an excellent tool for scrapping web page.

Scrapping web page


import os, sys
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup as bs

def ParesIndex (sHtml):
    entries = sHtml.find_all('td')
    entry = entries[0].find('a', recursive=True)
    sTitle = entry.text
    sValue = entries[1].span.text
    sDelta = entries[2].span.text
    sPercentage = entries[3].span.text
    print(sTitle + ' ' + sValue + ' ' + sDelta + ' ' + sPercentage)


def scrapping(url):
    # Open up a session
    session = requests.Session()
    session.headers.update({'User-Agent': "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322"})
    
    response = session.get(url)    
    #print (response.text)
    
    soup = bs(response.text, features="html.parser")
    
    #spans = soup.find_all('table', {'class' : 'cr_stockindexes_table'})

    #use CSS selectors
    spans = soup.select('table.cr_stockindexes_table')

    entries = spans[0].find_all('tr', {'data-type' : 'Index'})

    ParesIndex(entries[0]) # Dow
    ParesIndex(entries[1]) # Nasdaq
    ParesIndex(entries[2]) # S&P 500

    
url = 'https://www.wsj.com/market-data/quotes/index/US/DJIA'

scrapping(url)