/* * Assignment 2 TK6434 Data Structures & Algorithm Sem II 2008/2009 * Compression - Huffman code * * Refs: * - http://www.csgnetwork.com/asciiset.html * - http://www.cs.duke.edu/csed/poop/huff/info/ * - http://en.wikipedia.org/wiki/Huffman_coding * - http://cslibrary.stanford.edu/110/BinaryTrees.html */ #include #include #include #include #include #include // for swap #include using namespace std; struct node { int frequency; char data; int visited; char vertex; string path; struct node *left; struct node *right; }; struct queue { node *pointer; queue *next; }; queue *HEAD = NULL, *TAIL = NULL; void scanList(char, char[], int, int&, bool&); void selectionSort(char[], int[], int); void selectionSort(node *[], int); void displayNodes(char[], int[], int[], int); void trimArray(char[],int[], char[], int[], int[], int&); void getCharFrequency(string, char[], int[], char[], int[], int[], int&); node *newNode(string, int, int); int hasChildren(node *); void forestOfOneNodes(char[],int[], int, node *[]); void buildTree(node*, char[], int[], int); void traverseTree(node*, int); void enque(node*, string); node *deque(); void breadthFirstSearch(node*, string[], char[], int); void writeFile(string, string[], char[], int); void scanList(char ch, char charList[], int length, int &i, bool &found){ i=0; while(i < length && ch != charList[i]) { i++; } found = (i < length); } void selectionSort(node *p[], int length){ for (int nStartIndex = 0; nStartIndex < length; nStartIndex++) { // nSmallestIndex is the index of the smallest element we've encountered so far. int nSmallestIndex = nStartIndex; // Search through every element starting at nStartIndex+1 for (int nCurrentIndex = nStartIndex + 1; nCurrentIndex < length; nCurrentIndex++) { // If the current element is smaller than our previously found smallest if (p[nCurrentIndex]->frequency < p[nSmallestIndex]->frequency) // Store the index in nSmallestIndex nSmallestIndex = nCurrentIndex; } // Swap our start element with our smallest element swap(p[nStartIndex], p[nSmallestIndex]); } } void displayNodes(char charList[], int freqList[], int bitsArr[], int length){ int g; for(g=0; gdata << ":" << p[g]->frequency << ", "; cout << endl; } void trimArray(char toTrimChar[],int toTrimFreq[], char trimmedChar[], int trimmedFreq[], int bitsBefore[], int &length){ int m = 0; for (int h=0; hfrequency = f; node->data = data; node->visited = visited; node->vertex = '0'; node->path = ""; node->left = NULL; node->right = NULL; return(node); } void forestOfOneNodes(char charList[],int freqList[], int length, node *p[]){ for (int i=0; i 1) { selectionSort(tree,length); displayNodes(tree,length); char newName = NULL; int newValue = tree[0]->frequency + tree[1]->frequency; n = newNode(newName,newValue,0); n->left = tree[0]; n->left->vertex = '0'; n->right = tree[1]; n->right->vertex = '1'; tree[1] = n; for (i=1; ileft,1); cout << root->data << ":" << root->frequency << ", " ; traverseTree(root->right,1); break; case 2: //preorder cout << root->data << ":" << root->frequency << ", " ; traverseTree(root->left,2); traverseTree(root->right,2); break; break; case 3: //postorder traverseTree(root->left,3); traverseTree(root->right,3); cout << root->data << ":" << root->frequency << ", " ; break; } } void enque(node *toqueue, string path){ string t(1,toqueue->vertex); // cast char to string toqueue->path = path.append(t); // update path queue *temp = new struct queue; //cout << "put in q: " << toqueue->frequency; temp->pointer = toqueue; temp->next = NULL; if(HEAD == NULL) HEAD = temp; else TAIL->next = temp; TAIL = temp; //cout << " - HEAD:" << HEAD->pointer->frequency << " TAIL: " << TAIL->pointer->frequency << endl; } node *deque(){ queue* temp; temp = HEAD; HEAD = HEAD->next; return temp->pointer; } void breadthFirstSearch(node *root, string binary[], char charList[], int length){ /* * 1. Enqueue the root node. * 2. Dequeue a node and examine it. * a) If the element sought is found in this node, quit the search and return a result. * b) Otherwise enqueue any successors (the direct child nodes) that have not yet been examined. * 3. If the queue is empty, every node on the graph has been examined – quit the search and return "not found". * 4. Repeat from Step 2. Note: Using a stack instead of a queue would turn this algorithm into a depth-first search. */ cout << endl << "Building optimal table...." << endl; enque(root,root->path); node *m; bool found; int index; int bits = 0, total = 0; while (HEAD!=NULL && TAIL!=NULL){ m = deque(); // Deque a node if (!m->visited){ m->visited = 1; // Mark the node as visited if (m->left == NULL && m->right == NULL) { bits = m->path.length() * m->frequency; total = total + bits; scanList(m->data,charList,length,index,found); if (found) binary[index] = m->path; cout << m->data << ":" << m->frequency << ", bits: " << bits ; cout << ", path: " << m->path << endl; } } if (m->left != NULL) enque(m->left,m->path); //Enque all its adjacent nodes if (m->right != NULL) enque(m->right,m->path); } cout << "Total bits after compression: " << total << endl; } void writeFile(string myfile, string binary[], char charList[], int length){ string lines; int index; bool found; char ch; ofstream outfile ("compressed.txt"); cout << endl << "Writing to file compressed.txt..." << endl; ifstream in( myfile.c_str() ); if (in.is_open()){ in.get(ch); // xtracts a character from the stream and stores it in ch while (! in.eof() ){ if (ch == '\n') ch = '^'; // newline else if (ch == '\x20') ch = '_'; // space scanList(ch,charList,length,index,found); if (found) lines.append(binary[index]); in.get(ch); } in.close(); } cout << lines; if (outfile.is_open()) { outfile << lines; outfile.close(); cout << endl; } else cout << "Unable to open file for writing"; } int main() { char charListAllowed[] = "abcdedefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@$()[]%&'.,_^"; // allowed characters int freqListAllowed[sizeof(charListAllowed)]; int length = sizeof(charListAllowed); // num of character to count char charList[length]; int freqList[length]; // frequency counts int bitsBefore[length]; node* tree[length]; string binary[length]; string myfile; cout << "==============================================" << endl; cout << "\tHuffman Code for Compression" << endl; cout << "==============================================" << endl; cout << endl << "Please enter the filename u wish to process: "; getline (cin,myfile); getCharFrequency(myfile,charListAllowed,freqListAllowed,charList,freqList,bitsBefore,length); buildTree(tree,charList,freqList,length); cout << endl << "Traversing tree inorder..." << endl; traverseTree(tree[0],1); cout << endl << "Traversing tree preorder..." << endl; traverseTree(tree[0],2); cout << endl; breadthFirstSearch(tree[0],binary,charList,length); writeFile(myfile,binary,charList,length); }