Data Structures and Algorithms/Sorting Data/Bucket Sort in C

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  
 *   This is a demo program to show how good BucketSort can be.            *
 *                                                                         *
 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *
 *   Copyright (C) 2010 by Ingenieurbüro Henning, Kiel, Germany            *
 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published     *
 *   by the Free Software Foundation.                                      *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include <stdio.h>

#define MAXKEYS    100000   // max. number of data records
#define MAXWLNG        25   // max. length of the words in the data record
#define MAXBUCKETS     20   // max. number of buckets

struct FiAd       // register of buckets with the star-points in linked lists
  {
  int  StartIndex;// element in linked list where the bucket starts
  int  LastIndex; // last element of the bucket (to find it fast)
  int  NumInFifo; // number of references in the fifo
  };

struct FiFo       // structure of the linked lists used as bucket
  {
  int  PosIndex;  // where in the table can this element be found
  int  Next;      // pointer to next element in this bucket
  };


struct DataRec    // the structure of the test data
  {
  char   TheChar;
  short  TheShint;
  int    TheInt;
  float  TheFloat;
  double TheDouble;
  int    ThePos;
  int    TheLength;
  char   TheKey[MAXWLNG + 1];
  };

// local subroutines
void BuckSortInt (struct DataRec* Source, struct FiFo* InFif, int Start,
                 int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr);
void WriteText   (char * TextName);

struct DataRec DataRecs   [MAXKEYS];          // the input list (unsorted)
struct DataRec SortedRecs [MAXKEYS];          // the sorted output list
int    NumRecs;                               // number of data records

// the input and output files; you have to adapt them to your needs
char In_ListN_1 [40] = "/home/user/EinList.bin";
char OutTextT_2 [40] = "/home/user/BucketSort.txt";


int main(int argc, char *argv[])
  {
  struct FiFo Positions  [MAXKEYS];           // the sequence of records
  int         NewPosis   [MAXKEYS];           // where it is now
  int         RecLng = sizeof(struct DataRec);// size of one data record
  int         PosNr  = 0;                     // position to store result
  int         I, LastInt, NoBuckets;
  FILE        *fp;

  NumRecs   = 100000;                         // number of records to sort
  NoBuckets = 7;                              // number of recursion levels

  // we get the input data which we want to sort
  fp = fopen(In_ListN_1, "rb");
  if(fp != NULL)
    {
    fread(DataRecs, NumRecs * sizeof(struct DataRec), 1, fp);
    fclose(fp);
    }
   else
    {
    printf("something is wrong with the file\n");
    return;
    };

  // we set up a linked list with references to all records
  for(I = 0; I < NumRecs; I++)                // filling linked list
    {
    Positions[I].PosIndex = I;                // record x is at position x
    Positions[I].Next     = I + 1;            // pointer to the next element
    };
  Positions[I - 1].Next   = -1;               // end of the linked list

  // we indirectly sort the data records; correct sequence will be in NumRecs
  BuckSortInt(DataRecs, Positions, 0, NumRecs, NoBuckets, NewPosis, &PosNr);

  // we copy the records in correct order to the array SortedRecs
  for(I = 0; I < NumRecs; I++)
    {
    memmove(&SortedRecs[I], &DataRecs[NewPosis[I]], RecLng);
    };

  // and we check for correct sorting
  LastInt = 0x80000000;
  for(I = 0; I < NumRecs; I++)
    {
    if(SortedRecs[I].TheInt < LastInt)
      {
      printf("sorting not correct\n");
      return;
      };
    LastInt = SortedRecs[I].TheInt;
    };

  printf("sorting correct\n");
  WriteText(OutTextT_2);
  }


void BuckSortInt(struct DataRec* Source, struct FiFo* InFif, int Start, 
                 int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr)
  {
  int I, K, Index, LastLast, FirstFree, Next;
  int StepSize, MinVal, MaxVal, ActVal;
  long long Interval;
  int  BuckBorder[MAXBUCKETS];                // border values between buckets
  struct FiAd* FifoAdmin;                     // the fifo administration
  struct FiFo* Fifo;                          // the left fifo

  if(NumBuckets > MAXBUCKETS) return;

  // we reserve space for the chained lists, depending on length of table
  FifoAdmin = (struct FiAd*) malloc(NumBuckets * sizeof(struct FiAd));
  Fifo      = (struct FiFo*) malloc(NumLines   * sizeof(struct FiFo));

  for(K = 0; K < NumBuckets; K++)
    {
    FifoAdmin[K].StartIndex = FifoAdmin[K].LastIndex  = -1;
    FifoAdmin[K].NumInFifo  = 0;
    };

  FirstFree = 0;                              // first free element

  // now we find the smallest and biggest element for the actual bucket
  MinVal = 0x7FFFFFFF;                        // min. value to biggest value
  MaxVal = 0x80000000;                        // max. value to smallest value
  Next   = Start;                             // this is the next element
  while(Next >= 0)                            // while bucket is not empty ...
    {
    Index = InFif[Next].PosIndex;             // next record of this bucket
    ActVal = Source[Index].TheInt;            // the key value of the record
    if(ActVal < MinVal) MinVal = ActVal;      // key smaller => swap value
    if(ActVal > MaxVal) MaxVal = ActVal;      // key bigger  => swap value
    Next = InFif[Next].Next;                  // ponter to next element
    };

  // only identical key values => the record numbers go directly to the result
  if(MinVal == MaxVal)                        // all keys identical ?
    {
    Next   = Start;                           // this is the next element
    while(Next >= 0)                          // while bucket is not empty ...
      {
      NewPosis[NewPosNr[0]] = InFif[Next].PosIndex;// final position of record
      NewPosNr[0]++;                          // we point to free element
      Next = InFif[Next].Next;                // next element of linked list
      };
    };

  // now we make the border-array ready
  Interval = (long long) MaxVal - (long long) MinVal; // we get the domain
  StepSize = Interval / NumBuckets;           // makes chunks of this size
  for(I = NumBuckets - 1; I > 0; I--)         // loop over the buckets
    {
    BuckBorder[I] = MaxVal - StepSize;        // lower border of the bucket
    MaxVal = MaxVal - StepSize;               // lower border for next bucket
    };
  BuckBorder[I] = MinVal;                     // this way is save

  //now we distribute the records of the input bucket to the new sub-buckets
  Next   = Start;                             // this is the next element
  while(Next >= 0)                            // while bucket is not empty ...
    {
    Index = InFif[Next].PosIndex;             // next record of this bucket
    ActVal = Source[Index].TheInt;            // key value of the record
    for(I = NumBuckets - 1; I >= 0; I--)
       {
       if(ActVal >= BuckBorder[I])            // pointing to corrrect bucket?
         {
         if(FifoAdmin[I].StartIndex < 0)      // already entries?
           {
           // this bucket is used the first time; we start the new linked list
           FifoAdmin[I].StartIndex  = FirstFree; // start and end
           FifoAdmin[I].LastIndex   = FirstFree; // are identical
           FifoAdmin[I].NumInFifo   = 1;      // one element in Fifo
           Fifo[FirstFree].PosIndex = Index;  // number of record to list
           Fifo[FirstFree].Next     = -1;     // no further elements in list
           FirstFree++;
           }
          else
           {
           // this bucket was already used; we add the reference
           LastLast                 = FifoAdmin[I].LastIndex; // last entry
           FifoAdmin[I].NumInFifo++;          // one element more in fifo
           FifoAdmin[I].LastIndex   = FirstFree; // new last element
           Fifo[LastLast].Next      = FirstFree; // linking ex-last element
           Fifo[FirstFree].PosIndex = Index;  // the record number
           Fifo[FirstFree].Next     = -1;     // the linked list ends here
           FirstFree++;
           };
         break;                               // record distributed
         };
       };
    Next = InFif[Next].Next;                  // next record of actual bucket
    };

  // we make the recursion and call this function for any subbucket
  for(I = 0; I < NumBuckets; I++)
    {
    if(FifoAdmin[I].NumInFifo > 1)
      BuckSortInt(Source, Fifo, FifoAdmin[I].StartIndex, 
                  FifoAdmin[I].NumInFifo, NumBuckets, NewPosis, NewPosNr);
     else
      if(FifoAdmin[I].NumInFifo == 1)
        {
        NewPosis[NewPosNr[0]] = Fifo[FifoAdmin[I].StartIndex].PosIndex;
        NewPosNr[0]++;
        }
    };

  // we must free the memory we got from the operating system
  free(Fifo);
  free(FifoAdmin);
  };


void WriteText(char * TextName)
  {
  FILE *fp;
  int I;
  char   TChar;
  short  TShint;
  int    TInt;
  float  TFloat;
  double TDouble;
  int    TPos;
  int    TLength;

  fp = fopen(TextName, "wb");
  if(fp != NULL)
    {
    for(I = 0; I < NumRecs; I++)
      {
      TChar   = SortedRecs[I].TheChar;
      TShint  = SortedRecs[I].TheShint;
      TInt    = SortedRecs[I].TheInt;
      TFloat  = SortedRecs[I].TheFloat;
      TDouble = SortedRecs[I].TheDouble;
      TPos    = SortedRecs[I].ThePos;
      TLength = SortedRecs[I].TheLength;
      fprintf(fp, "%6d  %6d  %12d  %14e  %14e  %6d   %4d  %s\n", TChar, 
      TShint, TInt, TFloat, TDouble, TPos, TLength, SortedRecs[I].TheKey);
      };
    fclose(fp);
    }
   else
    {
    printf("Ausgabedatei %s kann nicht angelegt werden\n", TextName);
    return;
    };
  };