Python selenium
from selenium import webdriver
driver = webdriver.PhantomJS()
driver.get("https://www.weather.gov/okx/stormevents")
date = driver.find_elements_by_css_selector('#pagebody > div:nth-child(3) > div > table > tbody > tr > td ul:nth-child(6)')
lis = date[0].find_elements_by_css_selector('li')
li_wea = []
count = 0
rows=[]
for i in lis:
li_wea.append(i.text)
li_wea[count] = '2016 ' + li_wea[count]
li_wea[count] = li_wea[count].split(" ")
li_wea[count] = li_wea[count][0] + "-" + li_wea[count][1] + "-" + li_wea[count][2] + "-" + li_wea[count][3]
# rows.append([li_wea[count][0], li_wea[count][1], li_wea[count][2] ,li_wea[count][3]])
count += 1
#rows
new1 = pd.DataFrame(li_wea, columns=['old'])
new1['date'] = new1['old'].str.extract('(\d\d\d\d-...-\d\d)', expand=True)
new1['date'][4] = '2016-Feb-05'
new1['date'][5] = '2016-Feb-08'
new1['date'][11] = '2016-Apr-03'
new1['date'][12] = '2016-Apr-04'
new1['date'][14] = '2016-June-28'
new1['date'][15] = '2016-July-18'
new1['date'][16] = '2016-July-29'
new1['date'][17] = '2016-July-31'
new1['date'][25] = '2016-Oct-08'
new1['date'][35] = '2016-Dec-05'
new1 = new1.drop('old', axis=1)
new2 = pd.DataFrame(['2016-August-01', '2016-Dec-01'], columns=['date'])
new1 = new1.append(new2, ignore_index=True).dropna()
new1['date'] = pd.to_datetime(new1['date'])
new1
| date | |
|---|---|
| 0 | 2016-01-10 |
| 1 | 2016-01-13 |
| 2 | 2016-01-17 |
| 3 | 2016-01-23 |
| 4 | 2016-02-05 |
| 5 | 2016-02-08 |
| 6 | 2016-02-15 |
| 7 | 2016-02-24 |
| 8 | 2016-03-14 |
| 9 | 2016-03-21 |
| 10 | 2016-03-28 |
| 11 | 2016-04-03 |
| 12 | 2016-04-04 |
| 13 | 2016-05-30 |
| 14 | 2016-06-28 |
| 15 | 2016-07-18 |
| 16 | 2016-07-29 |
| 17 | 2016-07-31 |
| 18 | 2016-08-10 |
| 19 | 2016-08-11 |
| 20 | 2016-08-12 |
| 21 | 2016-08-13 |
| 22 | 2016-08-20 |
| 24 | 2016-09-19 |
| 25 | 2016-10-08 |
| 26 | 2016-10-22 |
| 27 | 2016-10-22 |
| 28 | 2016-10-27 |
| 29 | 2016-10-30 |
| 30 | 2016-11-11 |
| 31 | 2016-11-14 |
| 32 | 2016-11-20 |
| 33 | 2016-11-29 |
| 34 | 2016-11-30 |
| 35 | 2016-12-05 |
| 36 | 2016-12-15 |
| 37 | 2016-12-17 |
| 38 | 2016-12-18 |
| 39 | 2016-08-01 |
| 40 | 2016-12-01 |
Reference