version 2.8
encode_rlefont.cc
1 #include "encode_rlefont.hh"
2 #include <algorithm>
3 #include <stdexcept>
4 #include "ccfixes.hh"
5 
6 // Number of reserved codes before the dictionary entries.
7 #define DICT_START 24
8 
9 // Special reference to mean "fill with zeros to the end of the glyph"
10 #define REF_FILLZEROS 16
11 
12 // RLE codes
13 #define RLE_CODEMASK 0xC0
14 #define RLE_VALMASK 0x3F
15 #define RLE_ZEROS 0x00 // 0 to 63 zeros
16 #define RLE_64ZEROS 0x40 // (1 to 64) * 64 zeros
17 #define RLE_ONES 0x80 // 1 to 64 full alphas
18 #define RLE_SHADE 0xC0 // 1 to 4 partial alphas
19 
20 // Dictionary "fill entries" for encoding bits directly.
21 #define DICT_START7BIT 4
22 #define DICT_START6BIT 132
23 #define DICT_START5BIT 196
24 #define DICT_START4BIT 228
25 #define DICT_START3BIT 244
26 #define DICT_START2BIT 252
27 
28 namespace mcufont {
29 namespace rlefont {
30 
31 // Get bit count for the "fill entries"
32 static size_t fillentry_bitcount(size_t index)
33 {
34  if (index >= DICT_START2BIT)
35  return 2;
36  else if (index >= DICT_START3BIT)
37  return 3;
38  else if (index >= DICT_START4BIT)
39  return 4;
40  else if (index >= DICT_START5BIT)
41  return 5;
42  else if (index >= DICT_START6BIT)
43  return 6;
44  else
45  return 7;
46 }
47 
48 // Count the number of equal pixels at the beginning of the pixelstring.
49 static size_t prefix_length(const DataFile::pixels_t &pixels, size_t pos)
50 {
51  uint8_t pixel = pixels.at(pos);
52  size_t count = 1;
53  while (pos + count < pixels.size() &&
54  pixels.at(pos + count) == pixel)
55  {
56  count++;
57  }
58  return count;
59 }
60 
61 // Perform the RLE encoding for a dictionary entry.
62 static encoded_font_t::rlestring_t encode_rle(const DataFile::pixels_t &pixels)
63 {
64  encoded_font_t::rlestring_t result;
65 
66  size_t pos = 0;
67  while (pos < pixels.size())
68  {
69  uint8_t pixel = pixels.at(pos);
70  size_t count = prefix_length(pixels, pos);
71  pos += count;
72 
73  if (pixel == 0)
74  {
75  // Up to 63 zeros can be encoded with RLE_ZEROS. If there are more,
76  // encode using RLE_64ZEROS, and then whatever remains with RLE_ZEROS.
77  while (count >= 64)
78  {
79  size_t c = (count > 4096) ? 64 : (count / 64);
80  result.push_back(RLE_64ZEROS | (c - 1));
81  count -= c * 64;
82  }
83 
84  if (count)
85  {
86  result.push_back(RLE_ZEROS | count);
87  }
88  }
89  else if (pixel == 15)
90  {
91  // Encode ones.
92  while (count)
93  {
94  size_t c = (count > 64) ? 64 : count;
95  result.push_back(RLE_ONES | (c - 1));
96  count -= c;
97  }
98  }
99  else
100  {
101  // Encode shades.
102  while (count)
103  {
104  size_t c = (count > 4) ? 4 : count;
105  result.push_back(RLE_SHADE | ((c - 1) << 4) | pixel);
106  count -= c;
107  }
108  }
109  }
110 
111  return result;
112 }
113 
114 // We use a tree structure to represent the dictionary entries.
115 // Using this tree, we can perform a combined Aho-Corasick string matching
116 // and breadth-first search to find the optimal encoding of glyph data.
117 class DictTreeNode
118 {
119 public:
120  constexpr DictTreeNode():
121  m_index(-1),
122  m_ref(false),
123  m_length(0),
124  m_child0(nullptr),
125  m_child15(nullptr),
126  m_suffix(nullptr)
127  {}
128 
129  void SetChild(uint8_t p, DictTreeNode *child)
130  {
131  if (p == 0)
132  m_child0 = child;
133  else if (p == 15)
134  m_child15 = child;
135  else if (p > 15)
136  throw std::logic_error("invalid pixel alpha: " + std::to_string(p));
137  else
138  {
139  if (!m_children)
140  {
141  m_children.reset(new DictTreeNode*[14]());
142  }
143  m_children[p - 1] = child;
144  }
145  }
146 
147  DictTreeNode* GetChild(uint8_t p) const
148  {
149  if (p == 0)
150  return m_child0;
151  else if (p == 15)
152  return m_child15;
153  else if (p > 15)
154  throw std::logic_error("invalid pixel alpha: " + std::to_string(p));
155  else if (!m_children)
156  return nullptr;
157  else
158  return m_children[p - 1];
159  }
160 
161  bool HasIntermediateChildren() const { return m_children != nullptr; }
162 
163  int GetIndex() const { return m_index; }
164  void SetIndex(int index) { m_index = index; }
165  bool GetRef() const { return m_ref; }
166  void SetRef(bool ref) { m_ref = ref; }
167  size_t GetLength() const { return m_length; }
168  void SetLength(size_t length) { m_length = length; }
169  DictTreeNode *GetSuffix() const { return m_suffix; }
170  void SetSuffix(DictTreeNode *suffix) { m_suffix = suffix; }
171 
172 private:
173  // Index of dictionary entry or -1 if just a intermediate node.
174  int m_index;
175 
176  // True for ref-encoded dictionary entries. Used to avoid recursion when
177  // encoding them.
178  bool m_ref;
179 
180  // Length of the corresponding dictionary entry replacement.
181  // Equals the distance from the tree root.
182  size_t m_length;
183 
184  // Most tree nodes will only ever contains children for 0 or 15.
185  // Therefore the array for other nodes is allocated only on demand.
186  DictTreeNode *m_child0;
187  DictTreeNode *m_child15;
188  std::unique_ptr<DictTreeNode*[]> m_children;
189 
190  // Pointer to the longest suffix of this entry that exists in the
191  // dictionary.
192  DictTreeNode *m_suffix;
193 };
194 
195 // Preallocated array for tree nodes
196 class TreeAllocator
197 {
198 public:
199  TreeAllocator(size_t count)
200  {
201  m_storage.reset(new DictTreeNode[count]);
202  m_next = m_storage.get();
203  m_left = count;
204  }
205 
206  DictTreeNode *allocate()
207  {
208  if (m_left == 0)
209  throw std::logic_error("Ran out of preallocated entries");
210 
211  m_left--;
212  return m_next++;
213  }
214 
215 private:
216  std::unique_ptr<DictTreeNode[]> m_storage;
217  DictTreeNode *m_next;
218  size_t m_left;
219 };
220 
221 // Add a new dictionary entry to the tree. Adds the intermediate nodes, but
222 // does not yet fill the suffix pointers.
223 static DictTreeNode* add_tree_entry(const DataFile::pixels_t &entry, int index,
224  bool ref_encoded, DictTreeNode *root,
225  TreeAllocator &storage)
226 {
227  DictTreeNode* node = root;
228  for (uint8_t p : entry)
229  {
230  DictTreeNode* branch = node->GetChild(p);
231  if (!branch)
232  {
233  branch = storage.allocate();
234  node->SetChild(p, branch);
235  }
236 
237  node = branch;
238  }
239 
240  // Replace the entry if it either does not yet have an encoding, or if
241  // the new entry is non-ref (i.e. can be used in more situations).
242  if (node->GetIndex() < 0 || (node->GetRef() && !ref_encoded))
243  {
244  node->SetIndex(index);
245  node->SetRef(ref_encoded);
246  node->SetLength(entry.size());
247  }
248 
249  return node;
250 }
251 
252 // Walk the tree and find if the entry exists in the tree. If it does,
253 // returns a pointer to it, otherwise nullptr.
254 static DictTreeNode *find_tree_node(DataFile::pixels_t::const_iterator begin,
255  DataFile::pixels_t::const_iterator end,
256  DictTreeNode *root)
257 {
258  DictTreeNode* node = root;
259  while (begin != end)
260  {
261  uint8_t pixel = *begin++;
262  node = node->GetChild(pixel);
263 
264  if (!node)
265  return nullptr;
266  }
267 
268  return node;
269 }
270 
271 // Fill in the suffix pointers recursively for the given subtree.
272 static void fill_tree_suffixes(DictTreeNode *root, DictTreeNode *subtree,
273  const DataFile::pixels_t &entry)
274 {
275  for (size_t i = 1; i < entry.size(); i++)
276  {
277  DictTreeNode *node = find_tree_node(entry.begin() + i, entry.end(), root);
278  if (node)
279  {
280  subtree->SetSuffix(node);
281  break;
282  }
283  }
284 
285  if (!subtree->GetSuffix())
286  subtree->SetSuffix(root);
287 
288  DataFile::pixels_t newentry(entry);
289  newentry.resize(entry.size() + 1);
290  for (uint8_t i = 0; i < 16; i++)
291  {
292  // Speed-up for the common case of 0 and 15 alphas.
293  if (i == 1 && !subtree->HasIntermediateChildren())
294  i += 14;
295 
296  DictTreeNode *child = subtree->GetChild(i);
297  if (child)
298  {
299  newentry.at(entry.size()) = i;
300  fill_tree_suffixes(root, child, newentry);
301  }
302  }
303 }
304 
305 // Construct a lookup tree from the dictionary entries.
306 static DictTreeNode* construct_tree(const std::vector<DataFile::dictentry_t> &dictionary,
307  TreeAllocator &storage, bool fast)
308 {
309  DictTreeNode* root = storage.allocate();
310 
311  // Populate the hardcoded entries for 0 to 15 alpha.
312  for (int j = 0; j < 16; j++)
313  {
314  DictTreeNode *node = storage.allocate();
315  node->SetIndex(j);
316  node->SetRef(false);
317  node->SetLength(1);
318  root->SetChild(j, node);
319  }
320 
321  // Populate the actual dictionary entries
322  size_t i = DICT_START;
323  for (DataFile::dictentry_t d : dictionary)
324  {
325  if (!d.replacement.size())
326  break;
327 
328  add_tree_entry(d.replacement, i, d.ref_encode, root, storage);
329  i++;
330  }
331 
332  if (!fast)
333  {
334  // Populate the fill entries for rest of dictionary
335  for (; i < 256; i++)
336  {
337  DataFile::pixels_t pixels;
338  size_t bitcount = fillentry_bitcount(i);
339  uint8_t byte = i - DICT_START7BIT;
340  for (size_t j = 0; j < bitcount; j++)
341  {
342  uint8_t p = (byte & (1 << j)) ? 15 : 0;
343  pixels.push_back(p);
344  }
345 
346  add_tree_entry(pixels, i, false, root, storage);
347  }
348 
349  // Fill in the suffix pointers for optimal encoding
350  DataFile::pixels_t nullentry;
351  fill_tree_suffixes(root, root, nullentry);
352  }
353 
354  return root;
355 }
356 
357 // Structure for keeping track of the shortest encoding to reach particular
358 // point of the pixel string.
359 struct encoding_link_t
360 {
361  // Index of the position prior to the last dictionary entry.
362  size_t previous;
363 
364  // Index of the dictionary entry that brings us to this point.
365  int index;
366 
367  // Number of links to get here from the start of the string.
368  size_t length;
369 
370  constexpr encoding_link_t(): previous(0), index(-1), length(9999999) {}
371 };
372 
373 // Perform the reference encoding for a glyph entry (optimal version).
374 // Uses a modified Aho-Corasick algorithm combined with breadth first search
375 // to find the shortest representation.
376 static encoded_font_t::refstring_t encode_ref_slow(const DataFile::pixels_t &pixels,
377  const DictTreeNode *root,
378  bool is_glyph)
379 {
380  // Chain of encodings. Each entry in this array corresponds to a position
381  // in the pixel string.
382  std::unique_ptr<encoding_link_t[]> chain(new encoding_link_t[pixels.size() + 1]);
383 
384  chain[0].previous = 0;
385  chain[0].index = 0;
386  chain[0].length = 0;
387 
388  // Read the pixels one-by-one and update the encoding links accordingly.
389  const DictTreeNode *node = root;
390  for (size_t pos = 0; pos < pixels.size(); pos++)
391  {
392  uint8_t pixel = pixels.at(pos);
393  const DictTreeNode *branch = node->GetChild(pixel);
394 
395  while (!branch)
396  {
397  // Cannot expand this sequence, defer to suffix.
398  node = node->GetSuffix();
399  branch = node->GetChild(pixel);
400  }
401 
402  node = branch;
403 
404  // We have arrived at a new node, add it and any proper suffixes to
405  // the link chain.
406  const DictTreeNode *suffix = node;
407  while (suffix != root)
408  {
409  if (suffix->GetIndex() >= 0 && (is_glyph || !suffix->GetRef()))
410  {
411  encoding_link_t link;
412  link.previous = pos + 1 - suffix->GetLength();
413  link.index = suffix->GetIndex();
414  link.length = chain[link.previous].length + 1;
415 
416  if (link.length < chain[pos + 1].length)
417  chain[pos + 1] = link;
418  }
419  suffix = suffix->GetSuffix();
420  }
421  }
422 
423  // Check if we can shorten the final encoding using REF_FILLZEROS.
424  if (is_glyph)
425  {
426  for (size_t pos = pixels.size() - 1; pos > 0; pos--)
427  {
428  if (pixels.at(pos) != 0)
429  break;
430 
431  encoding_link_t link;
432  link.previous = pos;
433  link.index = REF_FILLZEROS;
434  link.length = chain[pos].length + 1;
435 
436  if (link.length <= chain[pixels.size()].length)
437  chain[pixels.size()] = link;
438  }
439  }
440 
441  // Backtrack from the final link back to the start and construct the
442  // encoded string.
443  encoded_font_t::refstring_t result;
444  size_t len = chain[pixels.size()].length;
445  result.resize(len);
446 
447  size_t pos = pixels.size();
448  for (size_t i = len; i > 0; i--)
449  {
450  result.at(i - 1) = chain[pos].index;
451  pos = chain[pos].previous;
452  }
453 
454  return result;
455 }
456 
457 // Walk the tree as far as possible following the given pixel string iterator.
458 // Returns number of pixels encoded, and index is set to the dictionary reference.
459 static size_t walk_tree(const DictTreeNode *tree,
460  DataFile::pixels_t::const_iterator pixels,
461  DataFile::pixels_t::const_iterator pixelsend,
462  int &index, bool is_glyph)
463 {
464  size_t best_length = 0;
465  size_t length = 0;
466  index = -1;
467 
468  const DictTreeNode* node = tree;
469  while (pixels != pixelsend)
470  {
471  uint8_t pixel = *pixels++;
472  node = node->GetChild(pixel);
473 
474  if (!node)
475  break;
476 
477  length++;
478 
479  if (is_glyph || !node->GetRef())
480  {
481  if (node->GetIndex() >= 0)
482  {
483  index = node->GetIndex();
484  best_length = length;
485  }
486  }
487  }
488 
489  if (index < 0)
490  throw std::logic_error("walk_tree failed to find a valid encoding");
491 
492  return best_length;
493 }
494 
495 // Perform the reference encoding for a glyph entry (fast version).
496 // Uses a simple greedy search to find select the encodings.
497 static encoded_font_t::refstring_t encode_ref_fast(const DataFile::pixels_t &pixels,
498  const DictTreeNode *tree,
499  bool is_glyph)
500 {
501  encoded_font_t::refstring_t result;
502 
503  // Strip any zeroes from end
504  size_t end = pixels.size();
505 
506  if (is_glyph)
507  {
508  while (end > 0 && pixels.at(end - 1) == 0) end--;
509  }
510 
511  size_t i = 0;
512  while (i < end)
513  {
514  int index;
515  i += walk_tree(tree, pixels.begin() + i, pixels.end(), index, is_glyph);
516  result.push_back(index);
517  }
518 
519  if (i < pixels.size())
520  result.push_back(REF_FILLZEROS);
521 
522  return result;
523 }
524 
525 static encoded_font_t::refstring_t encode_ref(const DataFile::pixels_t &pixels,
526  const DictTreeNode *tree,
527  bool is_glyph, bool fast)
528 {
529  if (fast)
530  return encode_ref_fast(pixels, tree, is_glyph);
531  else
532  return encode_ref_slow(pixels, tree, is_glyph);
533 }
534 
535 // Compare dictionary entries by their coding type.
536 // Sorts RLE-encoded entries first and any empty entries last.
537 static bool cmp_dict_coding(const DataFile::dictentry_t &a,
538  const DataFile::dictentry_t &b)
539 {
540  if (a.replacement.size() == 0 && b.replacement.size() != 0)
541  return false;
542  else if (a.replacement.size() != 0 && b.replacement.size() == 0)
543  return true;
544  else if (a.ref_encode == false && b.ref_encode == true)
545  return true;
546  else
547  return false;
548 }
549 
550 size_t estimate_tree_node_count(const std::vector<DataFile::dictentry_t> &dict)
551 {
552  size_t count = DICT_START; // Preallocated entries
553  for (const DataFile::dictentry_t &d: dict)
554  {
555  count += d.replacement.size();
556  }
557  count += 128 * 7; // Fill entries
558  return count;
559 }
560 
561 std::unique_ptr<encoded_font_t> encode_font(const DataFile &datafile,
562  bool fast)
563 {
564  std::unique_ptr<encoded_font_t> result(new encoded_font_t);
565 
566  // Sort the dictionary so that RLE-coded entries come first.
567  // This way the two are easy to distinguish based on index.
568  std::vector<DataFile::dictentry_t> sorted_dict = datafile.GetDictionary();
569  std::stable_sort(sorted_dict.begin(), sorted_dict.end(), cmp_dict_coding);
570 
571  // Build the binary tree for looking up references.
572  size_t count = estimate_tree_node_count(sorted_dict);
573  TreeAllocator allocator(count);
574  DictTreeNode* tree = construct_tree(sorted_dict, allocator, fast);
575 
576  // Encode the dictionary entries, using either RLE or reference method.
577  for (const DataFile::dictentry_t &d : sorted_dict)
578  {
579  if (d.replacement.size() == 0)
580  {
581  continue;
582  }
583  else if (d.ref_encode)
584  {
585  result->ref_dictionary.push_back(encode_ref(d.replacement, tree, false, fast));
586  }
587  else
588  {
589  result->rle_dictionary.push_back(encode_rle(d.replacement));
590  }
591  }
592 
593  // Then reference-encode the glyphs
594  for (const DataFile::glyphentry_t &g : datafile.GetGlyphTable())
595  {
596  result->glyphs.push_back(encode_ref(g.data, tree, true, fast));
597  }
598 
599  // Optionally verify that the encoding was correct.
600  if (!fast)
601  {
602  for (size_t i = 0; i < datafile.GetGlyphCount(); i++)
603  {
604  std::unique_ptr<DataFile::pixels_t> decoded =
605  decode_glyph(*result, i, datafile.GetFontInfo());
606  if (*decoded != datafile.GetGlyphEntry(i).data)
607  {
608  auto iter = std::mismatch(decoded->begin(), decoded->end(),
609  datafile.GetGlyphEntry(i).data.begin());
610  size_t pos = iter.first - decoded->begin();
611  throw std::logic_error("verification of glyph " + std::to_string(i) +
612  " failed at position " + std::to_string(pos));
613  }
614  }
615  }
616 
617  return result;
618 }
619 
620 size_t get_encoded_size(const encoded_font_t &encoded)
621 {
622  size_t total = 0;
623  for (const encoded_font_t::rlestring_t &r : encoded.rle_dictionary)
624  {
625  total += r.size();
626 
627  if (r.size() != 0)
628  total += 2; // Offset table entry
629  }
630  for (const encoded_font_t::refstring_t &r : encoded.ref_dictionary)
631  {
632  total += r.size();
633 
634  if (r.size() != 0)
635  total += 2; // Offset table entry
636  }
637  for (const encoded_font_t::refstring_t &r : encoded.glyphs)
638  {
639  total += r.size();
640  total += 2; // Offset table entry
641  total += 1; // Width table entry
642  }
643  return total;
644 }
645 
646 std::unique_ptr<DataFile::pixels_t> decode_glyph(
647  const encoded_font_t &encoded,
648  const encoded_font_t::refstring_t &refstring,
649  const DataFile::fontinfo_t &fontinfo)
650 {
651  std::unique_ptr<DataFile::pixels_t> result(new DataFile::pixels_t);
652 
653  for (uint8_t ref : refstring)
654  {
655  if (ref <= 15)
656  {
657  result->push_back(ref);
658  }
659  else if (ref == REF_FILLZEROS)
660  {
661  result->resize(fontinfo.max_width * fontinfo.max_height, 0);
662  }
663  else if (ref < DICT_START)
664  {
665  throw std::logic_error("unknown code: " + std::to_string(ref));
666  }
667  else if (ref - DICT_START < (int)encoded.rle_dictionary.size())
668  {
669  for (uint8_t rle : encoded.rle_dictionary.at(ref - DICT_START))
670  {
671  if ((rle & RLE_CODEMASK) == RLE_ZEROS)
672  {
673  for (int i = 0; i < (rle & RLE_VALMASK); i++)
674  {
675  result->push_back(0);
676  }
677  }
678  else if ((rle & RLE_CODEMASK) == RLE_64ZEROS)
679  {
680  for (int i = 0; i < ((rle & RLE_VALMASK) + 1) * 64; i++)
681  {
682  result->push_back(0);
683  }
684  }
685  else if ((rle & RLE_CODEMASK) == RLE_ONES)
686  {
687  for (int i = 0; i < (rle & RLE_VALMASK) + 1; i++)
688  {
689  result->push_back(15);
690  }
691  }
692  else if ((rle & RLE_CODEMASK) == RLE_SHADE)
693  {
694  uint8_t count, alpha;
695  count = ((rle & RLE_VALMASK) >> 4) + 1;
696  alpha = ((rle & RLE_VALMASK) & 0xF);
697  for (int i = 0; i < count; i++)
698  {
699  result->push_back(alpha);
700  }
701  }
702  }
703  }
704  else if (ref - DICT_START - encoded.rle_dictionary.size() < encoded.ref_dictionary.size())
705  {
706  size_t index = ref - DICT_START - encoded.rle_dictionary.size();
707  std::unique_ptr<DataFile::pixels_t> part =
708  decode_glyph(encoded, encoded.ref_dictionary.at(index),
709  fontinfo);
710  result->insert(result->end(), part->begin(), part->end());
711  }
712  else
713  {
714  size_t bitcount = fillentry_bitcount(ref);
715 
716  uint8_t byte = ref - DICT_START7BIT;
717  for (size_t i = 0; i < bitcount; i++)
718  {
719  uint8_t p = (byte & (1 << i)) ? 15 : 0;
720  result->push_back(p);
721  }
722  }
723  }
724 
725  return result;
726 }
727 
728 std::unique_ptr<DataFile::pixels_t> decode_glyph(
729  const encoded_font_t &encoded, size_t index,
730  const DataFile::fontinfo_t &fontinfo)
731 {
732  return decode_glyph(encoded, encoded.glyphs.at(index), fontinfo);
733 }
734 
735 }}