Python requests

from bs4 import BeautifulSoup
import requests
import urllib.request
df = pd.DataFrame(columns=["rank","keyword"])
response = requests.get("https://www.officeholidays.com/countries/usa/2016.php")
bs = BeautifulSoup(response.content, "html.parser")
trs = bs.select("table td")
trs1 = trs[1::5]
li = []
holi = pd.DataFrame()
count = 0

for i in trs1[0:14]:
    li.append((i.text).strip())
    li[count] = '2016 ' + li[count]
    li[count] = li[count].split(" ")
    li[count] = li[count][0] + "-" + li[count][1] + '-' + li[count][2]
    count += 1

holi['date'] = li
holi['date'] = pd.to_datetime(holi['date'])
holi
date
02016-01-01
12016-01-18
22016-02-15
32016-04-15
42016-05-08
52016-05-30
62016-06-19
72016-07-04
82016-09-05
92016-10-10
102016-11-11
112016-11-24
122016-11-25
132016-12-26

Reference


© 2018. All rights reserved.

Powered by Hydejack v7.5.0