This commit is contained in:
Andrey Gumirov
2024-01-11 02:44:53 +07:00
parent 8780822f95
commit b6a619303d
6 changed files with 201 additions and 57 deletions

View File

@ -6,22 +6,32 @@
#include <queue>
#include <iostream>
#define HEADER_SIZE 128
void initialize_table(const std::map<int, std::set<char> > &huffmanLengths,
std::unordered_map<char, std::pair<int, short> > &table)
void initialize_table(const int sumLen,
const std::map<int, std::set<char> > &huffmanLengths,
std::unordered_map<char, std::pair<int, short> > &codingTable,
std::vector<char> &symbols,
std::array<int, MAX_LEN> &counts)
{
int nextbl = 0;
int nextbl = 0, offset = 0; // offset = total offset to symbols of current len
short code = 0;
symbols.resize(sumLen);
counts.fill(0);
// std::cerr << "Sum len " << sumLen << std::endl;
for (auto lenCodePairIt = huffmanLengths.begin(); lenCodePairIt != huffmanLengths.end(); lenCodePairIt++)
{
int cnt = 0; // counter of symbols of current code length
auto lenCodePair = *lenCodePairIt;
counts[lenCodePair.first] = lenCodePair.second.size();
// std::cerr << "Counts[" << lenCodePair.first << "] " << counts[lenCodePair.first] << std::endl;
for (auto it = lenCodePair.second.begin(); it != lenCodePair.second.end(); it++)
{
table[*it].first = lenCodePair.first; // save current bit length for code
table[*it].second = code;
codingTable[*it].first = lenCodePair.first; // save current bit length for code
codingTable[*it].second = code;
// code := (code + 1) << ((bit length of the next symbol) (current bit length))
// code++;
@ -33,15 +43,20 @@ void initialize_table(const std::map<int, std::set<char> > &huffmanLengths,
nextbl = lenCodePair.first;
}
// std::cerr << "symbols[" << offset + cnt << "] =" << (*it) << " code " << std::bitset<16>(code) << std::endl;
symbols[offset + cnt] = *it;
code = (code + 1) << (nextbl - lenCodePair.first);
cnt++;
}
offset += cnt;
// code <<= 1;
}
}
HuffmanTable::HuffmanTable(uint8_t *header) {
int cnt1, cnt2;
HuffmanTable::HuffmanTable(const char *header) {
int cnt1, cnt2, total_cnt = 0;
std::map<int, std::set<char> > huffmanLengths;
for (int i = 0; i < HEADER_SIZE; i++) {
@ -49,13 +64,15 @@ HuffmanTable::HuffmanTable(uint8_t *header) {
cnt2 = (header[i] & 0b1111);
if (cnt1 != 0) huffmanLengths[cnt1].insert((char)(i * 2));
if (cnt2 != 0) huffmanLengths[cnt2].insert((char)(i * 2 + 1));
total_cnt += cnt1 + cnt2;
}
// build up codes
initialize_table(huffmanLengths, this->huffmanCodes);
initialize_table(total_cnt, huffmanLengths, this->huffmanCodes, this->symbols, this->counts);
}
void get_lengths(Node* root, int len,
void get_lengths(Node* root, int len, int &cnt,
std::map<int, std::set<char> > &huffmanLengths)
{
if (!root)
@ -65,11 +82,12 @@ void get_lengths(Node* root, int len,
if (root->isLeaf()) {
// huffmanCode[root->ch] = str;
// std::cerr << "Got leaf: " << root->getChar() << std::endl;
cnt++;
huffmanLengths[len].insert(root->getChar());
}
get_lengths(root->getLeft(), len + 1, huffmanLengths);
get_lengths(root->getRight(), len + 1, huffmanLengths);
get_lengths(root->getLeft(), len + 1, cnt, huffmanLengths);
get_lengths(root->getRight(), len + 1, cnt, huffmanLengths);
}
HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
@ -81,7 +99,7 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
freq[ch]++;
}
std::cerr << "Calculated freqs" << std::endl;
// std::cerr << "Calculated freqs" << std::endl;
// Create a priority queue to store live nodes of
// Huffman tree;
@ -94,7 +112,7 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
pq.push(new_node);
}
std::cerr << "Filled PQ: " << pq.size() << std::endl;
// std::cerr << "Filled PQ: " << pq.size() << std::endl;
// do till there is more than one node in the queue
while (pq.size() != 1)
@ -119,11 +137,20 @@ HuffmanTable::HuffmanTable(std::basic_istream<char> &is) {
Node* root = pq.top();
std::map<int, std::set<char> > huffmanLengths;
get_lengths(root, 0, huffmanLengths);
int total_cnt = 0;
get_lengths(root, 0, total_cnt, huffmanLengths);
// std::cerr << "Got lengths: " << huffmanLengths.size() << std::endl;
initialize_table(huffmanLengths, this->huffmanCodes);
initialize_table(total_cnt, huffmanLengths, this->huffmanCodes, this->symbols, this->counts);
// for (auto s : this->symbols) {
// std::cerr << "Symbol " << s << std::endl;
// }
// for (int i = 0; i < this->counts.size(); i++) {
// std::cerr << "Count for len " << i << " " << this->counts[i] << std::endl;
// }
}
std::pair<int, short> HuffmanTable::operator[](const char &c) {
@ -133,11 +160,45 @@ std::pair<int, short> HuffmanTable::operator[](const char &c) {
void HuffmanTable::write_symbol(obitstream &os, const char &c) {
if (huffmanCodes.find(c) == huffmanCodes.end()) throw std::runtime_error("No code in table for char!");
std::cerr << "Write code for " << c << " " << (int)c << " : " << std::bitset<16>(huffmanCodes[c].second) << " " << " len " << huffmanCodes[c].first << std::endl;
os.writebits(huffmanCodes[c].second, huffmanCodes[c].first);
}
uint8_t *HuffmanTable::to_header() {
uint8_t *header = new uint8_t[HEADER_SIZE];
int HuffmanTable::decode_one_symbol(ibitstream &bs)
{
uint16_t code = 0;
int len = 1, first = 0, index = 0;
while (len <= MAX_LEN) {
// read one bit
uint16_t bit = (uint16_t) bs.getbits(1);
code |= bit;
int count = this->counts[len];
// std::cerr << "Read bit " << bit << " code " << std::bitset<16>(code) << " len " << len <<
// " first " << std::bitset<16>(first) << " index " << index << " count " << count << std::endl;
if (code < first + count) {
return this->symbols[index + (code - first)];
}
index += count;
first += count;
first <<= 1;
code <<= 1;
len++;
}
return -1;
}
char *HuffmanTable::to_header() {
char *header = new char[HEADER_SIZE];
for (size_t i = 0; i < HEADER_SIZE; i++)
{