# -*- coding: utf-8 -*-
import pandas as pd
from geocodequery import GeocodeQuery
df = pd.read_csv('./birdsIwant3.csv',low_memory=False)
def addrs(location):
gq = GeocodeQuery("zh-tw", "tw")
gq.get_geocode(location)
print location
return pd.Series({"lat": gq.get_lat(), "lng": gq.get_lng()})
def test(location):
return pd.Series({"lat" :5, "lng":10})
df['lat'] = 0
df['lng'] = 0
query_count = 2
loop_count = int(df.shape[0]/query_count)
for lc in range(2):
df.loc[lc*query_count: (lc+1)*query_count, ['lat','lng']] = df[lc*query_count: (lc+1)*query_count]['location'].apply(addrs) ##the problem##
df.to_csv('./birdsIwant3_1.csv',index=False)
print pd.read_csv('./birdsIwant3_1.csv', low_memory=False)
query_count 是每次回圈查询的次数
loop_count 是计算总共需要跑几次循环
我跑了两个循环,print只截取有lat lng的
count.317 birdName.317 lat lng
0 NaN NaN 24.373316 121.310400
1 NaN NaN 24.205938 121.010132
2 NaN NaN 24.373316 121.310400
3 NaN NaN 24.774906 120.970782
是有存进去的
因为档案很大,每次写回去都有点久
如果内存充足或者api不会被google挡掉的话,可以考虑看看全部查完再写回去
或者试试看每个循环拆成一个档案最后合并起来
不知道有没有解决你的问题,有问题在一起讨论~