Python selenium
from selenium import webdriver
driver = webdriver.PhantomJS()
driver.get("https://www.weather.gov/okx/stormevents")
date = driver.find_elements_by_css_selector('#pagebody > div:nth-child(3) > div > table > tbody > tr > td ul:nth-child(6)')
lis = date[0].find_elements_by_css_selector('li')
li_wea = []
count = 0
rows=[]
for i in lis:
li_wea.append(i.text)
li_wea[count] = '2016 ' + li_wea[count]
li_wea[count] = li_wea[count].split(" ")
li_wea[count] = li_wea[count][0] + "-" + li_wea[count][1] + "-" + li_wea[count][2] + "-" + li_wea[count][3]
# rows.append([li_wea[count][0], li_wea[count][1], li_wea[count][2] ,li_wea[count][3]])
count += 1
#rows
new1 = pd.DataFrame(li_wea, columns=['old'])
new1['date'] = new1['old'].str.extract('(\d\d\d\d-...-\d\d)', expand=True)
new1['date'][4] = '2016-Feb-05'
new1['date'][5] = '2016-Feb-08'
new1['date'][11] = '2016-Apr-03'
new1['date'][12] = '2016-Apr-04'
new1['date'][14] = '2016-June-28'
new1['date'][15] = '2016-July-18'
new1['date'][16] = '2016-July-29'
new1['date'][17] = '2016-July-31'
new1['date'][25] = '2016-Oct-08'
new1['date'][35] = '2016-Dec-05'
new1 = new1.drop('old', axis=1)
new2 = pd.DataFrame(['2016-August-01', '2016-Dec-01'], columns=['date'])
new1 = new1.append(new2, ignore_index=True).dropna()
new1['date'] = pd.to_datetime(new1['date'])
new1
date | |
---|---|
0 | 2016-01-10 |
1 | 2016-01-13 |
2 | 2016-01-17 |
3 | 2016-01-23 |
4 | 2016-02-05 |
5 | 2016-02-08 |
6 | 2016-02-15 |
7 | 2016-02-24 |
8 | 2016-03-14 |
9 | 2016-03-21 |
10 | 2016-03-28 |
11 | 2016-04-03 |
12 | 2016-04-04 |
13 | 2016-05-30 |
14 | 2016-06-28 |
15 | 2016-07-18 |
16 | 2016-07-29 |
17 | 2016-07-31 |
18 | 2016-08-10 |
19 | 2016-08-11 |
20 | 2016-08-12 |
21 | 2016-08-13 |
22 | 2016-08-20 |
24 | 2016-09-19 |
25 | 2016-10-08 |
26 | 2016-10-22 |
27 | 2016-10-22 |
28 | 2016-10-27 |
29 | 2016-10-30 |
30 | 2016-11-11 |
31 | 2016-11-14 |
32 | 2016-11-20 |
33 | 2016-11-29 |
34 | 2016-11-30 |
35 | 2016-12-05 |
36 | 2016-12-15 |
37 | 2016-12-17 |
38 | 2016-12-18 |
39 | 2016-08-01 |
40 | 2016-12-01 |
Reference