WIP: MVP
This commit is contained in:
@ -6,22 +6,32 @@
|
||||
#include <queue>
|
||||
#include <iostream>
|
||||
|
||||
#define HEADER_SIZE 128
|
||||
|
||||
void initialize_table(const std::map<int, std::set<char> > &huffmanLengths,
|
||||
std::unordered_map<char, std::pair<int, short> > &table)
|
||||
void initialize_table(const int sumLen,
|
||||
const std::map<int, std::set<char> > &huffmanLengths,
|
||||
std::unordered_map<char, std::pair<int, short> > &codingTable,
|
||||
std::vector<char> &symbols,
|
||||
std::array<int, MAX_LEN> &counts)
|
||||
{
|
||||
int nextbl = 0;
|
||||
int nextbl = 0, offset = 0; // offset = total offset to symbols of current len
|
||||
short code = 0;
|
||||
|
||||
symbols.resize(sumLen);
|
||||
counts.fill(0);
|
||||
|
||||
// std::cerr << "Sum len " << sumLen << std::endl;
|
||||
|
||||
for (auto lenCodePairIt = huffmanLengths.begin(); lenCodePairIt != huffmanLengths.end(); lenCodePairIt++)
|
||||
{
|
||||
int cnt = 0; // counter of symbols of current code length
|
||||
auto lenCodePair = *lenCodePairIt;
|
||||
|
||||
counts[lenCodePair.first] = lenCodePair.second.size();
|
||||
// std::cerr << "Counts[" << lenCodePair.first << "] " << counts[lenCodePair.first] << std::endl;
|
||||
|
||||
for (auto it = lenCodePair.second.begin(); it != lenCodePair.second.end(); it++)
|
||||
{
|
||||
table[*it].first = lenCodePair.first; // save current bit length for code
|
||||
table[*it].second = code;
|
||||
codingTable[*it].first = lenCodePair.first; // save current bit length for code
|
||||
codingTable[*it].second = code;
|
||||
|
||||
// code := (code + 1) << ((bit length of the next symbol) − (current bit length))
|
||||
// code++;
|
||||
@ -33,15 +43,20 @@ void initialize_table(const std::map<int, std::set<char> > &huffmanLengths,
|
||||
nextbl = lenCodePair.first;
|
||||
}
|
||||
|
||||
// std::cerr << "symbols[" << offset + cnt << "] =" << (*it) << " code " << std::bitset<16>(code) << std::endl;
|
||||
symbols[offset + cnt] = *it;
|
||||
|
||||
code = (code + 1) << (nextbl - lenCodePair.first);
|
||||
cnt++;
|
||||
}
|
||||
|
||||
offset += cnt;
|
||||
// code <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
HuffmanTable::HuffmanTable(uint8_t *header) {
|
||||
int cnt1, cnt2;
|
||||
HuffmanTable::HuffmanTable(const char *header) {
|
||||
int cnt1, cnt2, total_cnt = 0;
|
||||
std::map<int, std::set<char> > huffmanLengths;
|
||||
|
||||
for (int i = 0; i < HEADER_SIZE; i++) {
|
||||
@ -49,13 +64,15 @@ HuffmanTable::HuffmanTable(uint8_t *header) {
|
||||
cnt2 = (header[i] & 0b1111);
|
||||
if (cnt1 != 0) huffmanLengths[cnt1].insert((char)(i * 2));
|
||||
if (cnt2 != 0) huffmanLengths[cnt2].insert((char)(i * 2 + 1));
|
||||
|
||||
total_cnt += cnt1 + cnt2;
|
||||
}
|
||||
|
||||
// build up codes
|
||||
initialize_table(huffmanLengths, this->huffmanCodes);
|
||||
initialize_table(total_cnt, huffmanLengths, this->huffmanCodes, this->symbols, this->counts);
|
||||
}
|
||||
|
||||
void get_lengths(Node* root, int len,
|
||||
void get_lengths(Node* root, int len, int &cnt,
|
||||
std::map<int, std::set<char> > &huffmanLengths)
|
||||
{
|
||||
if (!root)
|
||||
@ -65,11 +82,12 @@ void get_lengths(Node* root, int len,
|
||||
if (root->isLeaf()) {
|
||||
// huffmanCode[root->ch] = str;
|
||||
// std::cerr << "Got leaf: " << root->getChar() << std::endl;
|
||||
cnt++;
|
||||
huffmanLengths[len].insert(root->getChar());
|
||||
}
|
||||
|
||||
get_lengths(root->getLeft(), len + 1, huffmanLengths);
|
||||
get_lengths(root->getRight(), len + 1, huffmanLengths);
|
||||
get_lengths(root->getLeft(), len + 1, cnt, huffmanLengths);
|
||||
get_lengths(root->getRight(), len + 1, cnt, huffmanLengths);
|
||||
}
|
||||
|
||||
HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
|
||||
@ -81,7 +99,7 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
|
||||
freq[ch]++;
|
||||
}
|
||||
|
||||
std::cerr << "Calculated freqs" << std::endl;
|
||||
// std::cerr << "Calculated freqs" << std::endl;
|
||||
|
||||
// Create a priority queue to store live nodes of
|
||||
// Huffman tree;
|
||||
@ -94,7 +112,7 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
|
||||
pq.push(new_node);
|
||||
}
|
||||
|
||||
std::cerr << "Filled PQ: " << pq.size() << std::endl;
|
||||
// std::cerr << "Filled PQ: " << pq.size() << std::endl;
|
||||
|
||||
// do till there is more than one node in the queue
|
||||
while (pq.size() != 1)
|
||||
@ -119,11 +137,20 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
|
||||
Node* root = pq.top();
|
||||
|
||||
std::map<int, std::set<char> > huffmanLengths;
|
||||
get_lengths(root, 0, huffmanLengths);
|
||||
int total_cnt = 0;
|
||||
get_lengths(root, 0, total_cnt, huffmanLengths);
|
||||
|
||||
// std::cerr << "Got lengths: " << huffmanLengths.size() << std::endl;
|
||||
|
||||
initialize_table(huffmanLengths, this->huffmanCodes);
|
||||
initialize_table(total_cnt, huffmanLengths, this->huffmanCodes, this->symbols, this->counts);
|
||||
|
||||
// for (auto s : this->symbols) {
|
||||
// std::cerr << "Symbol " << s << std::endl;
|
||||
// }
|
||||
|
||||
// for (int i = 0; i < this->counts.size(); i++) {
|
||||
// std::cerr << "Count for len " << i << " " << this->counts[i] << std::endl;
|
||||
// }
|
||||
}
|
||||
|
||||
std::pair<int, short> HuffmanTable::operator[](const char &c) {
|
||||
@ -133,11 +160,45 @@ std::pair<int, short> HuffmanTable::operator[](const char &c) {
|
||||
void HuffmanTable::write_symbol(obitstream &os, const char &c) {
|
||||
if (huffmanCodes.find(c) == huffmanCodes.end()) throw std::runtime_error("No code in table for char!");
|
||||
|
||||
std::cerr << "Write code for " << c << " " << (int)c << " : " << std::bitset<16>(huffmanCodes[c].second) << " " << " len " << huffmanCodes[c].first << std::endl;
|
||||
|
||||
os.writebits(huffmanCodes[c].second, huffmanCodes[c].first);
|
||||
}
|
||||
|
||||
uint8_t *HuffmanTable::to_header() {
|
||||
uint8_t *header = new uint8_t[HEADER_SIZE];
|
||||
int HuffmanTable::decode_one_symbol(ibitstream &bs)
|
||||
{
|
||||
uint16_t code = 0;
|
||||
int len = 1, first = 0, index = 0;
|
||||
|
||||
while (len <= MAX_LEN) {
|
||||
// read one bit
|
||||
uint16_t bit = (uint16_t) bs.getbits(1);
|
||||
|
||||
code |= bit;
|
||||
|
||||
|
||||
int count = this->counts[len];
|
||||
|
||||
// std::cerr << "Read bit " << bit << " code " << std::bitset<16>(code) << " len " << len <<
|
||||
// " first " << std::bitset<16>(first) << " index " << index << " count " << count << std::endl;
|
||||
|
||||
|
||||
if (code < first + count) {
|
||||
return this->symbols[index + (code - first)];
|
||||
}
|
||||
|
||||
index += count;
|
||||
first += count;
|
||||
first <<= 1;
|
||||
code <<= 1;
|
||||
len++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *HuffmanTable::to_header() {
|
||||
char *header = new char[HEADER_SIZE];
|
||||
|
||||
for (size_t i = 0; i < HEADER_SIZE; i++)
|
||||
{
|
||||
|
Reference in New Issue
Block a user