Skip to content

Commit b8b456d

Browse files
Optimize endpoint reordering algorithm
This change improves the compression speed for both DXT and ETC encodings. Explanation: The main ideas used for optimization of the endpoint reordering algorithm: - On each iteration, the list of the chosen endpoints is updated only from one side, so all the computations performed for the unchanged side of the list can be cached and reused on the next iteration. - The list of the chosen endpoints can be build using an array of double size, growing from the middle of the array. This eliminates unnecessary memory reallocations and movements. - When an element is removed from the list of remaining endpoints, instead of moving all the elements with higher indices, just a single last element of the list can be moved into the position of the removed element (the original indices of the remaining endpoints should be stored within the list elements to maintain proper indexing). DXT Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8). [Compressing Kodak set without mipmaps using DXT1 encoding] Original: 1582222 bytes / 28.848 sec Modified: 1468204 bytes / 5.875 sec Improvement: 7.21% (compression ratio) / 79.63% (compression time) [Compressing Kodak set with mipmaps using DXT1 encoding] Original: 2065243 bytes / 36.952 sec Modified: 1914805 bytes / 7.834 sec Improvement: 7.28% (compression ratio) / 78.80% (compression time) ETC Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings). [Compressing Kodak set without mipmaps using ETC1 encoding] Total size: 1607858 bytes Total time: 13.261 sec Average bitrate: 1.363 bpp Average Luma PSNR: 34.050 dB
1 parent 7143913 commit b8b456d

File tree

2 files changed

+45
-36
lines changed

2 files changed

+45
-36
lines changed

bin/crunch_x64.exe

512 Bytes
Binary file not shown.

crnlib/crn_comp.cpp

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -615,55 +615,64 @@ static void sort_color_endpoints(crnlib::vector<uint16>& remapping, const optimi
615615
}
616616

617617
static void remap_color_endpoints(uint16* remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) {
618-
const uint* frequency = hist + selected * n;
619-
crnlib::vector<uint16> chosen, remaining;
620-
crnlib::vector<uint> total_frequency(n);
621-
chosen.push_back(selected);
618+
struct Node {
619+
uint index, frequency, front_similarity, back_similarity;
620+
optimize_color_params::unpacked_endpoint e;
621+
Node() { utils::zero_object(*this); }
622+
};
623+
crnlib::vector<Node> remaining(n);
622624
for (uint16 i = 0; i < n; i++) {
623-
if (i != selected) {
624-
remaining.push_back(i);
625-
total_frequency[i] = frequency[i];
626-
}
625+
remaining[i].index = i;
626+
remaining[i].e = unpacked_endpoints[i];
627627
}
628-
for (uint similarity_base = (uint)(4000 * (1.0f + weight)), total_frequency_normalizer = 0; remaining.size();) {
629-
const optimize_color_params::unpacked_endpoint& e_front = unpacked_endpoints[chosen.front()];
630-
const optimize_color_params::unpacked_endpoint& e_back = unpacked_endpoints[chosen.back()];
631-
uint16 selected_index = 0;
632-
uint64 best_value = 0, selected_similarity_front = 0, selected_similarity_back = 0;
633-
for (uint16 i = 0; i < remaining.size(); i++) {
634-
uint remaining_index = remaining[i];
635-
const optimize_color_params::unpacked_endpoint& e_remaining = unpacked_endpoints[remaining_index];
636-
uint error_front = color::elucidian_distance(e_remaining.low, e_front.low, false) + color::elucidian_distance(e_remaining.high, e_front.high, false);
637-
uint error_back = color::elucidian_distance(e_remaining.low, e_back.low, false) + color::elucidian_distance(e_remaining.high, e_back.high, false);
638-
uint64 similarity_front = similarity_base - math::minimum<uint>(error_front, 4000);
639-
uint64 similarity_back = similarity_base - math::minimum<uint>(error_back, 4000);
640-
uint64 value = math::maximum(similarity_front, similarity_back) * (total_frequency[remaining_index] + (total_frequency_normalizer << 3)) + 1;
641-
if (value > best_value) {
628+
crnlib::vector<uint16> chosen(n << 1);
629+
uint remaining_count = n, chosen_front = n, chosen_back = chosen_front;
630+
chosen[chosen_front] = selected;
631+
optimize_color_params::unpacked_endpoint front_e = remaining[selected].e, back_e = front_e;
632+
bool front_updated = true, back_updated = true;
633+
remaining[selected] = remaining[--remaining_count];
634+
const uint* frequency = hist + selected * n;
635+
636+
for (uint similarity_base = (uint)(4000 * (1.0f + weight)), frequency_normalizer = 0; remaining_count;) {
637+
uint64 best_value = 0;
638+
uint best_index = 0;
639+
for (uint i = 0; i < remaining_count; i++) {
640+
Node& node = remaining[i];
641+
node.frequency += frequency[node.index];
642+
if (front_updated)
643+
node.front_similarity = similarity_base - math::minimum<uint>(4000, color::elucidian_distance(node.e.low, front_e.low, false) + color::elucidian_distance(node.e.high, front_e.high, false));
644+
if (back_updated)
645+
node.back_similarity = similarity_base - math::minimum<uint>(4000, color::elucidian_distance(node.e.low, back_e.low, false) + color::elucidian_distance(node.e.high, back_e.high, false));
646+
uint64 value = math::maximum(node.front_similarity, node.back_similarity) * (node.frequency + frequency_normalizer) + 1;
647+
if (value > best_value || (value == best_value && node.index < selected)) {
642648
best_value = value;
643-
selected_index = i;
644-
selected_similarity_front = similarity_front;
645-
selected_similarity_back = similarity_back;
649+
best_index = i;
650+
selected = node.index;
646651
}
647652
}
648-
selected = remaining[selected_index];
649653
frequency = hist + selected * n;
650-
total_frequency_normalizer = total_frequency[selected];
651654
uint frequency_front = 0, frequency_back = 0;
652-
for (int front = 0, back = chosen.size() - 1, scale = back; scale > 0; front++, back--, scale -= 2) {
655+
for (int front = chosen_front, back = chosen_back, scale = back - front; scale > 0; front++, back--, scale -= 2) {
653656
frequency_front += scale * frequency[chosen[front]];
654657
frequency_back += scale * frequency[chosen[back]];
655658
}
656-
if (selected_similarity_front * frequency_front > selected_similarity_back * frequency_back) {
657-
chosen.push_front(selected);
659+
front_updated = back_updated = false;
660+
Node& best_node = remaining[best_index];
661+
frequency_normalizer = best_node.frequency << 3;
662+
if ((uint64)best_node.front_similarity * frequency_front > (uint64)best_node.back_similarity * frequency_back) {
663+
chosen[--chosen_front] = selected;
664+
front_e = best_node.e;
665+
front_updated = true;
658666
} else {
659-
chosen.push_back(selected);
667+
chosen[++chosen_back] = selected;
668+
back_e = best_node.e;
669+
back_updated = true;
660670
}
661-
remaining.erase(remaining.begin() + selected_index);
662-
for (uint16 i = 0; i < remaining.size(); i++)
663-
total_frequency[remaining[i]] += frequency[remaining[i]];
671+
best_node = remaining[--remaining_count];
664672
}
665-
for (uint16 i = 0; i < n; i++)
666-
remapping[chosen[i]] = i;
673+
674+
for (uint16 i = chosen_front; i <= chosen_back; i++)
675+
remapping[chosen[i]] = i - chosen_front;
667676
}
668677

669678
void crn_comp::optimize_color_endpoints_task(uint64 data, void* pData_ptr) {

0 commit comments

Comments
 (0)