Anonymous Anonymous - 1 month ago
132 0

No description

Python

create features

vdf['bias'] = 1.0

    def fsin(f, n):
        x = math.sin(2.0*math.pi * f / float(n))

        if x == None:
            return 0
        return x
    def fcos(f, n):
        x = math.cos(2.0*math.pi * f / float(n))
        if x == None:
            return 0
        return x
    
    vdf = vdf.copy()
    
    vdf['sin_day_of_week'] = vdf.tmstmp.apply(lambda x: fsin(x.weekday(), 7))
#     print vdf['sin_day_of_week']
#     vdf['sin_day_of_week'] = vdf['sin_day_of_week']
    vdf['cos_day_of_week'] = vdf.tmstmp.apply(lambda x: fcos(x.weekday(), 7))
    

    
    vdf['sin_hour_of_day'] = vdf.tmstmp.apply(lambda x: fsin(x.hour, 24))
    vdf['cos_hour_of_day'] = vdf.tmstmp.apply(lambda x: fcos(x.hour, 24))
    
    vdf['sin_time_of_day'] = vdf.tmstmp.apply(lambda x: fsin(x.hour * 60 + x.minute, 60*24))
    vdf['cos_time_of_day'] = vdf.tmstmp.apply(lambda x: fcos(x.hour * 60 + x.minute, 60*24))
    
    vdf['weekday'] = vdf.tmstmp.apply(lambda x: float((x.weekday() < 5)))
    
    
    
    vdf['sin_hdg'] = vdf.hdg.apply(lambda x : fsin(x,360))
    vdf['cos_hdg'] = vdf.hdg.apply(lambda x : fcos(x,360))
    
    
    vdf['61A'] = vdf.rt.apply(lambda x : int(x == "61A"))
    vdf['61B'] = vdf.rt.apply(lambda x : int(x == "61B"))
    vdf['61C'] = vdf.rt.apply(lambda x : int(x == "61C"))
    vdf['61D'] = vdf.rt.apply(lambda x : int(x == "61D"))
    
    vdf['Braddock '] = vdf.des.apply(lambda x: float(x == "Braddock "))
    vdf['Downtown'] = vdf.des.apply(lambda x: float(x == "Downtown"))
    vdf['Greenfield Only'] = vdf.des.apply(lambda x: float(x == "Greenfield Only"))
    vdf['McKeesport '] = vdf.des.apply(lambda x: float(x == "McKeesport "))
    vdf['Murray-Waterfront'] = vdf.des.apply(lambda x: float(x == "Murray-Waterfront"))
    vdf['Swissvale'] = vdf.des.apply(lambda x: float(x == "Swissvale"))
    
    del vdf['hdg']
    del vdf['tmstmp']
    del vdf['des']
    del vdf['rt']
    
    del vdf['vid']
    del vdf['pid']
    del vdf['tatripid']
    del vdf['tablockid']
    return vdf