/***************************************
 * copyright (c) Vanden Berghen Frank  *
 * V 1.2                               *
 * *************************************/

/*************************************************************************/
/*                                      */
/*    Central tree-forming algorithm    */
/*    ---------------------------------------------------------          */
/*                                      */
/*************************************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include <math.h>
#include "C45.h"
//#include <crtdbg.h>

static double **Item;
static ItemNo MaxItem=0;
static Boolean *FS;
static Attribute MaxAtt;	/* max att number */
static ClassNo	 MaxClass;
Boolean bweight=0;
extern ItemNo  MINOBJS;
#define  CVal(Case,Attribute)   Item[Case][Attribute]
#define  Class(Case)			(*((ClassNo*)(Item[Case]+MaxAtt)))
#define  weight(Case)			(*((ItemCount*)(Item[Case]+MaxAtt+1)))

void affiche(int Fp, int Lp)
{
	return;
	int i=0;
	printf("from %i to %i:\n",Fp,Lp);
	for (i=Fp; i<Lp; i++) 
	{
		printf ("   %f %f %i\n",CVal(i,0),CVal(i,1),Class(i));
		i++; if (i==20) { getchar(); i=0; }
	};
};

/*************************************************************************/
/*                                                                    */
/*        Exchange items at a and b                  */
/*                                     */
/*************************************************************************/

static void Swap(ItemNo a,ItemNo b)
{
    register double *Hold;

    Hold = Item[a];
    Item[a] = Item[b];
    Item[b] = Hold;
};

/********************************************************/
/*														*/
/*    Sort items from Fp to Lp on attribute a           */
/*														*/
/********************************************************/

void Quicksort(ItemNo Fp, ItemNo Lp, Attribute Att)
{
    register ItemNo Lower, Middle;
    register double Thresh;
    register ItemNo i;

    if ( Fp < Lp-1 )
    {
		Thresh = CVal(Lp-1, Att);

		//  Isolate all items with values <= threshold  
		Middle = Fp;
		for ( i = Fp ; i < Lp -1; i++ )
		{ 
			if ( CVal(i, Att) <= Thresh )
			{ 
				if ( i != Middle ) Swap(Middle, i);
				Middle++; 
			} 
		} 

		//  Extract all values equal to the threshold  
		Lower = Middle - 1;
		for ( i = Lower ; i >= Fp ; i-- )
		{
			if ( CVal(i, Att) == Thresh )
			{ 
				if ( i != Lower ) Swap(Lower, i);
				Lower--;
			} 
		} 

		//  Sort the lower values  
		Quicksort(Fp, Lower+1, Att);

		//  Position the middle element  
		Swap(Middle, Lp-1);

		//  Sort the higher values  
		Quicksort(Middle+1, Lp, Att);
    };
};

/*************************************************************************/
/*                                     */
/*    Construct a leaf               */
/*                                     */
/*************************************************************************/

double confidenceLevel(double error, double nItems)
{
//    return 1.0-error/nItems;
    return 1-(error+1)/(nItems+2);
}

Node *Make_Leaf(ItemCount *ClassFreq, ClassNo NodeClass, 
				ItemCount Cases		, double Errors)
{
    Node *N = (Node *) malloc(sizeof(Node));

    N->ClassDist = (ItemCount *) calloc(MaxClass, sizeof(ItemCount));
    memcpy(N->ClassDist, ClassFreq, MaxClass * sizeof(ItemCount));
    
    N->NodeType    = LeafType;
    N->Leaf        = NodeClass;
    N->Items       = Cases;
    N->Errors      = Errors;
	N->lower       = NULL;
	N->upper       = NULL;
    N->Confidence  = confidenceLevel(N->Errors,N->Items);
    return N; 
};

/***************************************/
/*						               */
/*    Change a leaf to a node          */
/*                                     */
/***************************************/


void Change_To_Node(Node *N, Attribute Att, double Bar)
/*  ----------  */
{
//    double Thresh;
    ItemNo i;
    double v, Best=0;
    Boolean NotYet=true;    

//    N->Forks = 2;

//  Return the greatest value 'Best' of attribute 'Att' below threshold 't'
    for(i=0; i<MaxItem; i++)
    {
        v = CVal(i, Att);
        if ( v <= Bar && ( NotYet || v > Best ) )
        {
            Best = v;
            NotYet = false;
        }
    }

    N->NodeType    = ThreshContin;
    N->Tested    = Att;
    N->Cut        = Best;
}

/*************************************************************************/
/*                                     */
/*  Compute the total information in V[ MinVal..MaxVal ]         */
/*                                     */
/*************************************************************************/

double TotalInfo(ItemCount *V, int MaxVal)
{
    int v;
    double Sum=0.0;
    ItemCount N, TotalItems=0;

    for (v=0; v<MaxVal; v++)
    {
        N = V[v];
        Sum += (double)(N * Log(N));
        TotalItems += N;
    }

    return (double)(TotalItems * Log(TotalItems) - Sum);
}


/******************************************************************************/
/*																			  */
/*  This routine finds the best cut on attribute Att for items Fp through	  */
/*  Lp and sets Info, Gain and Bar											  */
/*																			  */
/******************************************************************************/

// the 4 folowing variables are normally local to this function.
// they are pre-allocated to speed-up.
ItemCount
    **Freq,        /* Freq[x][c] = no. items of class c with outcome x (x=1,2)*/
    *ValFreq;	   /* ValFreq = no. items with outcome x (x=1,2) */

double
    *SplitGain,    /* SplitGain[i] = gain with att value of item i as threshold */
    *SplitInfo;    /* SplitInfo[i] = potential info ditto */

void EvalContinuousAtt(Attribute Att, ItemCount Items, ItemNo Fp, ItemNo Lp,
					   double BaseInfo, ItemCount *ClassFreq,
					   double *Bar,double *Gain,double *Info)
{ 
    ItemNo i, BestI,Tries=0;
    ItemCount MinSplit;
    ClassNo c;
    double Val, BestVal, ThreshCost,ThisInfo;
    
    if ( Items < 2 * MINOBJS )
    {
        Verbosity(2) printf("\tinsufficient cases.\n");
        *Gain = -Epsilon;
        *Info = 0.0;
        return;
    };

    Quicksort(Fp, Lp, Att);

    /*  Try possible cuts between items i and i+1, and determine the
    information and gain of the split in each case.  We have to be wary
    of splitting a small number of items off one end, as we can always
    split off a single item, but this has little predictive power.  */

    MinSplit = 0.10 * Items / (MaxClass + 1);
    if ( MinSplit < MINOBJS ) MinSplit = MINOBJS;
    else if ( MinSplit > 25 ) MinSplit = 25;

    for (c=0; c<MaxClass; c++) { Freq[0][c] = 0; Freq[1][c] = ClassFreq[c]; }

    ValFreq[0]= 0; ValFreq[1]= Items;
    for(i=Fp; i<Lp; i++)
    {
        c = Class(i);

        if (bweight)
        {
            ValFreq[0] +=weight(i);
		    ValFreq[1] -=weight(i);
            Freq[0][c] +=weight(i);
            Freq[1][c] -=weight(i);
        } else
        {
            ValFreq[0] ++;
		    ValFreq[1] --;
            Freq[0][c] ++;
            Freq[1][c] --;
        }

        if (ValFreq[0]<MinSplit) { SplitGain[i] = -Epsilon; continue;  }
        else if ( ValFreq[0]> Items - MinSplit ) 
        {
            while (i<Lp) { SplitGain[i] = -Epsilon; i++; }
            break;
        }
       
        if ( CVal(i,Att) < CVal(i+1,Att) - 1E-5 )
        {
			// Compute total info after split, by summing the
			// info of each of the subsets formed by the test

			ThisInfo=(TotalInfo(Freq[0], MaxClass)+
				      TotalInfo(Freq[1], MaxClass))/Items;

			//Set the gain in information for all items

			SplitGain[i] = BaseInfo - ThisInfo;
            SplitInfo[i] = TotalInfo(ValFreq, 2) / Items;
            Tries++;

            Verbosity(3)
            {    printf("\t\tCut at %.3f  (gain %.3f, val %.3f):",
                       ( CVal(i,Att) + CVal(i+1,Att) ) / 2,
                       SplitGain[i],
                       ThisInfo);
            }
        } else
		{
			SplitGain[i] = -Epsilon;
			SplitInfo[i] = 0;
		};
    }

    /*  Find the best attribute according to the given criterion  */

    ThreshCost = Log(Tries) / Items;

    BestVal = 0;
    BestI   = None;
    for(i=Fp; i<Lp; i++)
    {
        if ( (Val = SplitGain[i] - ThreshCost) > BestVal )
        {
            BestI   = i;
            BestVal = Val;
        }
    }

    /*  If a test on the attribute is able to make a gain,
    set the best break point, gain and information  */ 

    if ( BestI == None )
    {
        *Gain = -Epsilon;
        *Info = 0.0;

        Verbosity(2) printf("\tno gain\n");
    }
    else
    {
        *Bar  = (CVal(BestI,Att) + CVal(BestI+1,Att)) / 2;
        *Gain = BestVal;
        *Info = SplitInfo[BestI];

        Verbosity(2)
            printf("\tcut=%.3f, inf %.3f, gain %.3f\n",
               Bar[Att], Info[Att], Gain[Att]);
    }
};

/*************************************************************************/
/*                                      */
/*  Build a decision tree for the cases Fp through Lp:              */
/*                                      */
/*  - if all cases are of the same class, the tree is a leaf and so     */
/*      the leaf is returned labelled with this class              */
/*                                      */
/*  - for each attribute, calculate the potential information provided      */
/*    by a test on the attribute (based on the probabilities of each     */
/*    case having a particular value for the attribute), and the gain     */
/*    in information that would result from a test on the attribute     */
/*    (based on the probabilities of each case with a particular     */
/*    value for the attribute being of a particular class)         */
/*                                      */
/*  - on the basis of these figures, and depending on the current     */
/*    selection criterion, find the best attribute to branch on.      */
/*    Note:  this version will not allow a split on an attribute     */
/*    unless two or more subsets have at least MINOBJS items.      */
/*                                      */
/*  - try branching and test whether better than forming a leaf          */
/*                                      */
/*************************************************************************/


// the 5 folowing variables are normally local to this function.
// they are pre-allocated to speed-up.
ItemCount
    *ClassFreq;    /* ClassFreq[c] = no. items of class c */
double
    *Bar,          /* Bar[a]  = best threshold for contin att a */
    *Gain=NULL,    /* Gain[a] = info gain by split on att a */
    *Info;         /* Info[a] = potential info of split on att a */
short
    *Tested;       /* Tested[a] set if att a has already been tested */

Node *FormTree(ItemNo Fp, ItemNo Lp) // Fp=first position ; Lp=last position
{ 
    ItemNo i, Ep;
    ItemCount Cases=0.0, NoBestClass;
    double BestVal, Val, AvGain=0, BaseInfo, Thresh ;
    Attribute Att, BestAtt, Possible=0;
    ClassNo c, BestClass;
    Node *N;


    /*  Generate the class frequency distribution  */
    if (bweight)
    {
        for (c=0;  c<MaxClass; c++) ClassFreq[c] = 0.0;
        for (i=Fp; i<Lp;       i++) 
        {
            Cases+=weight(i);
            ClassFreq[ Class(i) ]+=weight(i);
        }
    } else
    {
        Cases = Lp-Fp;
        for (c=0;  c<MaxClass; c++) ClassFreq[c] = 0;
        for (i=Fp; i<Lp;       i++) ClassFreq[ Class(i) ]++;
    } 

    /*  Find the most frequent class  */

    BestClass = 0;
    for (c=1; c<MaxClass; c++)
        if (ClassFreq[c]>ClassFreq[BestClass]) BestClass = c;
    NoBestClass = ClassFreq[BestClass];

    N = Make_Leaf(ClassFreq, BestClass, Cases, Cases - NoBestClass);

    /*  If all cases are of the same class or there are not enough
    cases to divide, the tree is a leaf  */

    if ( NoBestClass == Cases  || Cases < 2 * MINOBJS )
    { 
        return N;
    };

	BaseInfo = TotalInfo(ClassFreq, MaxClass) / Cases;

//    if (BestClass==1) {
//        printf("here we are!\n");    }

    /*  For each available attribute, find the information and gain  */
    for (Att=0; Att<MaxAtt; Att++)
    { 
        Gain[Att] = -Epsilon;
		if (FS[Att])
		{
			EvalContinuousAtt(Att, Cases, Fp, Lp,BaseInfo,ClassFreq,
							  Bar+Att,Gain+Att,Info+Att);

			/*  Update average gain, excluding attributes with very many values  */
			if ( Gain[Att] > -Epsilon )
			{
				Possible++; AvGain+=Gain[Att];
			};
		};
    };

    AvGain  = ( Possible ? AvGain / Possible : 1E6 );
    Verbosity(2)
    {
	    if ( AvGain < 1E6 ) printf("\taverage gain %.3f\n", AvGain);
    }

    /*  Find the best attribute according to the given criterion  */

    BestVal = -Epsilon;
    BestAtt = None;
    for(Att=0; Att<MaxAtt; Att++) 
    { 
        if (( Gain[Att] > -Epsilon )&&
			( Gain[Att] >= AvGain - Epsilon )&&
			( Info[Att] > Epsilon )
		   )
        { 
			Val = Gain[Att]/Info[Att];
            if ( Val > BestVal ) 
            { 
                BestAtt  = Att; 
                BestVal = Val;
            } 
        } 
    } 

    /*  Decide whether to branch or not  */ 

    if ( BestAtt != None )
    { 
        Verbosity(1)
        {
            printf(" inf %.3f gain %.3f val %.3f\n",
                   Info[BestAtt], Gain[BestAtt], BestVal);
        }    
		//oli
//			N->Bestatt=BestAtt;
//			N->AttInfo=Info[BestAtt];
//			N->AttGain=Gain[BestAtt];
//			N->AttBestval=BestVal;
		//oli

	    /*  Build a node of the selected test  */
        /*  Continuous attribute  */
        Change_To_Node(N, BestAtt,Bar[BestAtt]);

	    /*  Recursive divide and conquer  */

        ++Tested[BestAtt];

		Verbosity(1) affiche(Fp,Lp);

		/*  Group items on the value of attribute Att. */
		Thresh = N->Cut;
		Ep=Fp;
		while ((Ep<Lp)&&(CVal(Ep,BestAtt)<=Thresh)) Ep++;
		for(i=Ep+1; i<Lp; i++)
			if (CVal(i, BestAtt)<=Thresh) { Swap(Ep, i); Ep++; };

        // do the recursion
		if ((Ep>Fp)&&(Ep<Lp))
		{
			N->lower = FormTree(Fp, Ep);
			N->upper = FormTree(Ep, Lp);
            N->Errors=N->lower->Errors+N->upper->Errors;
		} else 
        {     
            N->NodeType=LeafType;
            N->Confidence  = confidenceLevel(N->Errors,N->Items);
            N->Items=0; 
            N->Errors=0; 
        	N->lower=NULL;
        };

        --Tested[BestAtt];

        /*  See whether we would have been no worse off with a leaf  */

        if ( N->Errors >= Cases - NoBestClass - Epsilon )
        { 
            Verbosity(1)
            printf("Collapse tree for %d items to leaf\n",
                        Lp - Fp + 1);

            N->NodeType = LeafType;
            N->Confidence  = confidenceLevel(N->Errors,N->Items);
        } 
    }
    else
    { 
        Verbosity(1)
            printf("\tno sensible splits  %.1f/%.1f\n",
               Cases, Cases - NoBestClass);
    } 

    return N; 
} 

/*************************************************************************/
/*                                      */
/*        Allocate space for tree tables                  */
/*                                      */
/*************************************************************************/

void InitialiseLocalTreeVar(int m)
/*  ------------------  */
{ 
	int v;

	if (m>MaxItem)
	{
		if (SplitGain) 
		{	
			free(SplitGain);
			free(SplitInfo);
		};
		SplitGain = (double *) calloc(m, sizeof(double));
		SplitInfo = (double *) calloc(m, sizeof(double));
	};

	if (!Gain)
	{
		Tested    = (short *) calloc(MaxAtt, sizeof(short));

		Gain    = (double *) calloc(MaxAtt, sizeof(double));
		Info    = (double *) calloc(MaxAtt, sizeof(double));
		Bar        = (double *) calloc(MaxAtt, sizeof(double));

		Freq  = (ItemCount **) calloc(2, sizeof(ItemCount *));
		for(v=0; v<2; v++)
		{
			Freq[v]  = (ItemCount *) calloc(MaxClass, sizeof(ItemCount));
		};

		ValFreq = (ItemCount *) calloc(2, sizeof(ItemCount));
		ClassFreq = (ItemCount *) calloc(MaxClass, sizeof(ItemCount));
	};
};

void freeLocalVarForBuild()
{
	if (!Gain)
	{
		free(SplitGain);
		free(SplitInfo);
		free(Tested);
		free(Gain);
		free(Info);
		free(Bar);
		free(Freq);
		free(ValFreq);
		free(ClassFreq);
	};
};

Node *BuildTree(double **_Item, int _MaxItem, int _MaxClass, 
				int _MaxAtt, Boolean *_FS, Boolean _bweight)
{
    bweight=_bweight;
	Item=_Item;
	FS=_FS;
	MaxAtt=_MaxAtt;
	MaxClass=_MaxClass;
	InitialiseLocalTreeVar(_MaxItem);
	MaxItem=_MaxItem;
	return FormTree(0,MaxItem);
};

