Anonymous Anonymous - 1 month ago
170 0

No description

ActionScript

CategoricalPredictor

class CategoricalPredictor:
    """ Feature predictor for a categorical feature.
        Attributes: 
            p (dict) : dictionary of vector containing per class probability of a feature value;
                    the keys of dictionary should exactly match the values taken by this feature
    """
    # feel free to define and use any more attributes, e.g., number of classes, etc
    
    def __init__(self, x, y, alpha=1) :
        """ initializes the predictor statistics (p) for Categorical distribution
        Inputs:
            x (array_like): feature values (categorical)
            y (array_like): class labels (0,...,k-1)
        """
        self.p = {}
#         sampleSize = len(y)
        k = len(x.unique())
        featureNames = [(i,0) for i in x.unique()]
        featureCount = dict(featureNames)
        classNames = [(i,featureCount.copy()) for i in y.unique()]
        classCount = dict(classNames)
        
                    
#         classProbability = dict([(i,len([i for i in y.unique() for k in  ]))])
        
#         featureCount = [0 for i in featureNames]
        for i, (feature, cname) in enumerate(zip(x,y)):
            classCount[cname][feature] += 1
        
        classFeatureCount = dict([(i,sum(classCount[i].values())) for i in classCount])
#         print classFeatureCount
#         self.classProb = [(math.log(float(classFeatureCount[i])/float(len(x)))) for i in classFeatureCount]
#         print self.classProb
        for i in x.unique():
            b = np.array([])
            for cname in y.unique():
                b = np.append(b,[float(classCount[cname][i] + alpha)/float(classFeatureCount[cname] + k*alpha)])
                
            
            self.p[i] = b
#         print self.p
#         classProbability = [0 for i in y.unique()]
#         for i in y.unique():
#             for k in y:
#                 if i == k:
#                     classProbability[i] += 1
#         print len(x)
#         self.classProb = [(float(i)/float(len(x))) for i in classProbability]
#         
            
            
    
    def partial_log_likelihood(self, x):
        """ log likelihood of feature values x according to each class
        Inputs:
            x (array_like): vector of feature values
        Outputs:
            (array_like): matrix of log likelihood for this feature
        """
        total = []
        for feat in x:
            # print self.p[feat], feat
            temp = []
            for i in self.p[feat]:
                if i != 0:
                    temp.append(math.log(i))
                else:
                    temp.append(0.0)
            # temp = [math.log(i) for i in self.p[feat] if i != 0]
            total.append(temp)
        return np.array(total)
        pass