Python requests
from bs4 import BeautifulSoup
import requests
import urllib.request
df = pd.DataFrame(columns=["rank","keyword"])
response = requests.get("https://www.officeholidays.com/countries/usa/2016.php")
bs = BeautifulSoup(response.content, "html.parser")
trs = bs.select("table td")
trs1 = trs[1::5]
li = []
holi = pd.DataFrame()
count = 0
for i in trs1[0:14]:
li.append((i.text).strip())
li[count] = '2016 ' + li[count]
li[count] = li[count].split(" ")
li[count] = li[count][0] + "-" + li[count][1] + '-' + li[count][2]
count += 1
holi['date'] = li
holi['date'] = pd.to_datetime(holi['date'])
holi
date | |
---|---|
0 | 2016-01-01 |
1 | 2016-01-18 |
2 | 2016-02-15 |
3 | 2016-04-15 |
4 | 2016-05-08 |
5 | 2016-05-30 |
6 | 2016-06-19 |
7 | 2016-07-04 |
8 | 2016-09-05 |
9 | 2016-10-10 |
10 | 2016-11-11 |
11 | 2016-11-24 |
12 | 2016-11-25 |
13 | 2016-12-26 |
Reference