Python selenium

from selenium import webdriver
driver =  webdriver.PhantomJS()    
driver.get("https://www.weather.gov/okx/stormevents")
date = driver.find_elements_by_css_selector('#pagebody > div:nth-child(3) > div > table > tbody > tr > td ul:nth-child(6)')
lis = date[0].find_elements_by_css_selector('li')
li_wea = []
count = 0

rows=[]
for i in lis:
    li_wea.append(i.text)
    li_wea[count] = '2016 ' + li_wea[count]
    li_wea[count] = li_wea[count].split(" ")

    li_wea[count] = li_wea[count][0] + "-" + li_wea[count][1] + "-" + li_wea[count][2] + "-" + li_wea[count][3]
#     rows.append([li_wea[count][0], li_wea[count][1], li_wea[count][2] ,li_wea[count][3]])
    count += 1


#rows
new1 = pd.DataFrame(li_wea, columns=['old'])
new1['date'] = new1['old'].str.extract('(\d\d\d\d-...-\d\d)', expand=True)
new1['date'][4] = '2016-Feb-05'
new1['date'][5] = '2016-Feb-08'
new1['date'][11] = '2016-Apr-03'
new1['date'][12] = '2016-Apr-04'
new1['date'][14] = '2016-June-28'
new1['date'][15] = '2016-July-18'
new1['date'][16] = '2016-July-29'
new1['date'][17] = '2016-July-31'
new1['date'][25] = '2016-Oct-08'
new1['date'][35] = '2016-Dec-05'
new1 = new1.drop('old', axis=1)
new2 = pd.DataFrame(['2016-August-01', '2016-Dec-01'], columns=['date'])
new1 = new1.append(new2, ignore_index=True).dropna()
new1['date'] = pd.to_datetime(new1['date'])
new1
date
02016-01-10
12016-01-13
22016-01-17
32016-01-23
42016-02-05
52016-02-08
62016-02-15
72016-02-24
82016-03-14
92016-03-21
102016-03-28
112016-04-03
122016-04-04
132016-05-30
142016-06-28
152016-07-18
162016-07-29
172016-07-31
182016-08-10
192016-08-11
202016-08-12
212016-08-13
222016-08-20
242016-09-19
252016-10-08
262016-10-22
272016-10-22
282016-10-27
292016-10-30
302016-11-11
312016-11-14
322016-11-20
332016-11-29
342016-11-30
352016-12-05
362016-12-15
372016-12-17
382016-12-18
392016-08-01
402016-12-01

Reference


© 2018. All rights reserved.

Powered by Hydejack v7.5.0