/***************************************
 * copyright (c) Vanden Berghen Frank  *
 * V 1.2                               *
 ***************************************/

#include <memory.h>
#include <malloc.h>
#include <stdlib.h>
#include <math.h>
#include "tools.h"
#include "RRBAGFSC45.h"

//#include <crtdbg.h>

#ifdef __NO_DATASET__
#define __EVAL_ONLY__
#endif

BoostedC45 ::BoostedC45 (FILE *stream)
{
    int j;
    char buffer[300],*tline=buffer;

    fgets(tline,300,stream); nClasses=atol(tline);
    fgets(tline,300,stream); B=atol(tline);

    TreeTable=(C45**)malloc(B*sizeof(C45*));
    if (TreeTable==NULL)
    {
        fprintf(stderr,"Boosted C45: out of memory.\n");
        exit(210);
    };
	
	MinClass=255;
    for (j=0; j<B; j++)
    {
		TreeTable[j]=new C45(stream,nClasses);
        MinClass=MIN(TreeTable[j]->MinClass,MinClass);
    }
    vote=(double*)malloc((nClasses-MinClass)*sizeof(double));
    if (vote==NULL)
    {
        fprintf(stderr,"boosted C45: out of memory.\n");
        exit(210);
    };
}

BoostedC45 ::~BoostedC45 ()
{
    int i;
    if (name) free(name);
    free(vote);
    for (i=0; i<B; i++) delete TreeTable[i];
    free(TreeTable);
}

#define RealClass(Case) (*((ClassNo*)(d[Case]+nAtt)))

#ifndef __EVAL_ONLY__

BoostedC45::BoostedC45(DataSet *D, int _B): nClasses(D->MaxClass+D->MinClass), 
        MinClass(D->MinClass), B(_B)
{
    if (D->iw<1)
    {
        printf("You must load the dataset in memory with the option WI=1.\n");
        exit(198);
    }
    if (_B==0) { _B=10;}
    double **d=D->Item, error, sum;
    int i=D->nItem, nAtt2=D->MaxAtt+1, nAtt=D->MaxAtt, j=_B, k=1;
    C45 *c1;
    while (i--) d[i][nAtt2]=1.0;
    Boolean *FS=(Boolean*)malloc(nAtt);
    memset(FS,1,nAtt);
    TreeTable=(C45**)malloc(B*sizeof(C45*));
    vote=(double*)malloc((nClasses-MinClass)*sizeof(double));
    printf("Generating Boosted Trees\n"); 
    for (j=0; j<_B; j++)
    {
        TreeTable[j]=c1=new C45(D, FS, 1, 1);

        i=D->nItem; sum=0.0; error=0.0;
        while (i--)
        {
            sum+=d[i][nAtt2];
            if (c1->eval(d[i],NULL)!=RealClass(i)) error+=d[i][nAtt2];
        }
        if (k!=0)
        {
            i=D->nItem; k=0; B=j+1;
            while (i--) if (eval(d[i],NULL)!=RealClass(i)) k++;
        }
        printf("Error of tree[%i] = %f %% (%f cases)\n"
               "Error of tree[1..%i] = %f %% (%i cases)\n\n", 
                j+1, error/sum, error, j+1, ((double)k)/D->nItem, k); fflush(stdout);
        error=error/sum;
        if (error>0.5) { delete c1; break; }
        error=error*(1-error);
        i=D->nItem; sum=0;
        while (i--)
        {
            if (c1->eval(d[i],NULL)==RealClass(i)) d[i][nAtt2]*=error;
            sum+=d[i][nAtt2];
        }
        i=D->nItem; sum=((double)D->nItem)/sum;    
        while (i--) d[i][nAtt2]*=sum;
    }
    B=j;
    free(FS);
}

void BoostedC45::save(FILE *f)
{
    int i;
    fprintf(f,"Boosted Trees v1.00 (c) Vanden Berghen Frank\n%i\n%i\n",nClasses,B);
	for (i=0; i<B; i++)
	{
		printf(".");
		TreeTable[i]->save(f);
	};
	printf("\n");
}

#endif

ClassNo BoostedC45 ::eval(double *features,double *confidence)
{
	ItemCount k;
	ClassNo best=0, c;
    double conf;
	int i;

	memset(vote,0,(nClasses-MinClass)*sizeof(double));
//	for (i=0; i<B; i++) vote[(TreeTable[i])->eval(features, NULL)-MinClass]++;

    for (i=0; i<B; i++) 
    {
        c=(TreeTable[i])->eval(features, &conf)-MinClass;
        vote[c]+=conf;
    }

	k=vote[0];
	for (i=1; i<(nClasses-MinClass); i++) if (vote[i]>=k) { best=i; k=vote[i]; };
	
	if (confidence) *confidence=((double)k)/B;	// le pourcentage de C45 ayant vot correctement
    return best+MinClass;	    // la classe calcule de l'image
        
    return 0;
}

RRBAGFSC45::~RRBAGFSC45()
{
    int i;
    if (name) free(name);
    free(vote);
    for (i=0; i<nClassifier; i++) delete rr[i];
    free(rr);
}

inline double LossFunction(const double &conf)
{
// logistic loss:
//    return log(1+exp(-2.0*conf));
// least square regression loss:
//    return (1-conf)*(1-conf);
// ADABOOST loss: (exponential loss):
//    return exp(-conf);
// SVM loss:
    return 1-conf;
}

ClassNo RRBAGFSC45::eval(double *features,double *confidence)
{
    // loss-based decoding.

	ClassNo best=0,i,j,k;
    double conf,bvote=INF;
    Classifier **b=rr;

	memset(vote,0,MaxClass*sizeof(double));
    for (i=0; i<MaxClass; i++)
        for (j=i+1; j<MaxClass; j++)
        {
            k=(*(b++))->eval(features, &conf)-MinClass;

            //unweighted voting:
//            vote[k]--; continue;

            //weighted voting (based on loss-funtion):
            if (k!=i) conf=-conf;
            vote[i]+=LossFunction(conf);
            vote[j]+=LossFunction(-conf);
        }

	for (i=0; i<MaxClass; i++) 
        if (vote[i]<=bvote) { best=i; bvote=vote[i]; };
	
	if (confidence) 
        // the equation is only working for SVM loss function:
        *confidence=mmax(0,1-bvote/(MaxClass-1));
    return best+MinClass;	    // the calculated class 
};

RRBAGFSC45::RRBAGFSC45(FILE *stream)
{
    Classifier **b;
    int i;
    char buffer[300],*tline=buffer;

    fgets(tline,300,stream); MinClass=atol(tline);
	fgets(tline,300,stream); MaxClass=atol(tline);

    nClassifier=MaxClass*(MaxClass-1)/2;
    b=rr=(Classifier**)calloc(nClassifier,sizeof(BAGFSC45*));
	for (i=0; i<nClassifier; i++)
	{
        *b=NewClassifier(stream);
        b++;
    }
    vote=(double*)malloc(MaxClass*sizeof(double));
}

#ifndef __EVAL_ONLY__

RRBAGFSC45::RRBAGFSC45(DataSet *D, int K, int B)
    // if K==0 then calculation of optimal K is performed
    // if B==0 then calculation of optimal B is performed
    
    // if ((K==D->MaxAtt)&&(B==1)), we have BAGFSC45 := simple C45
{
    DataSet *t;
    Classifier **b;
    MinClass=D->MinClass;
    MaxClass=D->MaxClass;
    int i,j;
    vote=(double*)malloc(MaxClass*sizeof(double));
    nClassifier=MaxClass*(MaxClass-1)/2;
    b=rr=(Classifier**)calloc(nClassifier,sizeof(BAGFSC45*));
    for (i=0; i<MaxClass; i++)
        for (j=i+1; j<MaxClass; j++)
        {
            t=D->generate_Binary_Set(i,j);
// we can put here any classifier we want:

// we can use BAGFS:
            *b=new BAGFSC45(t,K,B);
//            if ((i==0)&&(j==1)&&(K==0)) K=((BAGFSC45*)*b)->K; 
//            if ((i==0)&&(j==1)&&(B==0)) B=((BAGFSC45*)*b)->B; 

// we can use Boosting:
//            *b=new BoostedC45(t,B);

            b++;
            delete t;
        }
}

void RRBAGFSC45::save(FILE *f)
{
    int i;
    fprintf(f,"Round Robin BAGFS Trees v1.00 (c) Vanden Berghen Frank\n%i\n%i\n",
	        MinClass, MaxClass);
	for (i=0; i<nClassifier; i++)
	{
		printf(".");
		(rr[i])->save(f);
        fflush(f);
	};
	printf("\n");
}

#endif

StackedRRBAGFSC45::~StackedRRBAGFSC45()
{
    if (name) free(name);
    free(distClass);
    delete bds;
}

ClassNo StackedRRBAGFSC45::eval(double *features,double *confidence)
{
    int nItem=bds->nItem,dist, lsize=bds->lineSize, 
        bestDist=maxINT,bvote2=0, j, nclass;
    unsigned char *dd=bds->d;
    ClassNo *cc=bds->c, best, i;
    int *oo=bds->o;
    
    for(i=0; i<MaxClass; i++) distClass[i]=maxINT;
//    memset(vote4,0,MaxClass*sizeof(int));
    constructBinaryResponse(features, binaryVector);
    for (j=0; j<nItem; j++)
    {
        dist=bds->distance(binaryVector,dd);
        if (dist<bestDist)  
        {
            memset(occurence,0,MaxClass*sizeof(int)); 
            bestDist=dist;
            nclass=MaxClass;
        }
        if (dist==bestDist) 
        {
            distClass[*cc]=dist;
            occurence[*cc]+=*oo;
            if ((bestDist==0)&&((--nclass)==0)) break;
        }
        oo++; cc++; dd+=lsize;
    }
    
    j=0;
    for (i=0; i<MaxClass; i++) 
    {
        j+=occurence[i];
        if ((distClass[i]==bestDist)&&(occurence[i]>=bvote2)) 
        { 
            if (bvote2==occurence[i]) { printf("!"); fflush(stdout); }
            bvote2=occurence[i]; 
            best=i; 
        }
    }

    if (confidence) 
    {
        if (j==bvote2) *confidence=1.0;
        else *confidence=bvote2/j;
    }
    return best+MinClass;
}

void StackedRRBAGFSC45::initMemory()
{
    distClass=(int*)malloc((nClassifier-1)/8+1+2*MaxClass*sizeof(int));
    occurence=distClass+MaxClass;
    binaryVector=(unsigned char*)(occurence+MaxClass);
}

void StackedRRBAGFSC45::constructBinaryResponse(double *feature, unsigned char *d)
{
    Classifier **r=rr;
    int current=1, mc=MaxClass, j,k;
    *d=0;
    for (j=0; j<mc; j++)
    {
        for (k=j+1; k<mc; k++)
        {
            if ((*(r++))->eval(feature,NULL)==j+MinClass) { *d|=current; };
            current<<=1;
            if (current==256) { current=1; *(++d)=0; }
        }
    }
}

StackedRRBAGFSC45::StackedRRBAGFSC45(FILE *stream): RRBAGFSC45(stream)
{
    bds=new BinaryDataSet(stream);
    initMemory();
}


#ifndef __EVAL_ONLY__

StackedRRBAGFSC45::StackedRRBAGFSC45(DataSet *D, FILE *f): RRBAGFSC45(f)
{
    finishInit(D);
}

StackedRRBAGFSC45::StackedRRBAGFSC45(DataSet *D, int K, int B): RRBAGFSC45(D,K,B)
{
    finishInit(D);
}

void StackedRRBAGFSC45::finishInit(DataSet *D)
{
    printf("Generating binary dataset (1 point=500 cases).\n");
    initMemory();
    int ni=D->nItem, MaxAtt=D->MaxAtt, i;
    bds=new BinaryDataSet(D);
    for (i=0; i<ni; i++)
    {
        constructBinaryResponse(D->Item[i], binaryVector);
        bds->add(binaryVector,(*((ClassNo*)(D->Item[i]+MaxAtt))));
        if ((i%500)==0) { printf("."); fflush(stdout); }
    }
    printf("\nfinished classifying dataset.\n");
}

void StackedRRBAGFSC45::save(FILE *stream)
{
    fprintf(stream,"Stacked Round Robin BAGFS Trees v1.00 (c) Vanden Berghen Frank\n");
    RRBAGFSC45::save(stream);
    bds->save(stream);
}

#endif
