version 2.8
optimize_rlefont.cc
1 #include "optimize_rlefont.hh"
2 #include "encode_rlefont.hh"
3 #include <random>
4 #include <iostream>
5 #include <set>
6 #include <thread>
7 #include <algorithm>
8 #include "ccfixes.hh"
9 
10 namespace mcufont {
11 namespace rlefont {
12 
13 typedef std::mt19937 rnd_t;
14 
15 // Select a random substring among all the glyphs in the datafile.
16 std::unique_ptr<DataFile::pixels_t> random_substring(const DataFile &datafile, rnd_t &rnd)
17 {
18  std::uniform_int_distribution<size_t> dist1(0, datafile.GetGlyphCount() - 1);
19  size_t index = dist1(rnd);
20 
21  const DataFile::pixels_t &pixels = datafile.GetGlyphEntry(index).data;
22 
23  std::uniform_int_distribution<size_t> dist2(2, pixels.size());
24  size_t length = dist2(rnd);
25 
26  std::uniform_int_distribution<size_t> dist3(0, pixels.size() - length);
27  size_t start = dist3(rnd);
28 
29  std::unique_ptr<DataFile::pixels_t> result;
30  result.reset(new DataFile::pixels_t(pixels.begin() + start,
31  pixels.begin() + start + length));
32  return result;
33 }
34 
35 // Try to replace the worst dictionary entry with a better one.
36 void optimize_worst(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
37 {
38  std::uniform_int_distribution<size_t> dist(0, 1);
39 
40  DataFile trial = datafile;
41  size_t worst = trial.GetLowScoreIndex();
42  DataFile::dictentry_t d = trial.GetDictionaryEntry(worst);
43  d.replacement = *random_substring(datafile, rnd);
44  d.ref_encode = dist(rnd);
45  trial.SetDictionaryEntry(worst, d);
46 
47  size_t newsize = get_encoded_size(trial);
48 
49  if (newsize < size)
50  {
51  d.score = size - newsize;
52  datafile.SetDictionaryEntry(worst, d);
53  size = newsize;
54 
55  if (verbose)
56  std::cout << "optimize_worst: replaced " << worst
57  << " score " << d.score << std::endl;
58  }
59 }
60 
61 // Try to replace random dictionary entry with another one.
62 void optimize_any(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
63 {
64  DataFile trial = datafile;
65  std::uniform_int_distribution<size_t> dist(0, DataFile::dictionarysize - 1);
66  size_t index = dist(rnd);
67  DataFile::dictentry_t d = trial.GetDictionaryEntry(index);
68  d.replacement = *random_substring(datafile, rnd);
69  trial.SetDictionaryEntry(index, d);
70 
71  size_t newsize = get_encoded_size(trial);
72 
73  if (newsize < size)
74  {
75  d.score = size - newsize;
76  datafile.SetDictionaryEntry(index, d);
77  size = newsize;
78 
79  if (verbose)
80  std::cout << "optimize_any: replaced " << index
81  << " score " << d.score << std::endl;
82  }
83 }
84 
85 // Try to append or prepend random dictionary entry.
86 void optimize_expand(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose, bool binary_only)
87 {
88  DataFile trial = datafile;
89  std::uniform_int_distribution<size_t> dist1(0, DataFile::dictionarysize - 1);
90  size_t index = dist1(rnd);
91  DataFile::dictentry_t d = trial.GetDictionaryEntry(index);
92 
93  std::uniform_int_distribution<size_t> dist3(1, 3);
94  size_t count = dist3(rnd);
95 
96  for (size_t i = 0; i < count; i++)
97  {
98  std::uniform_int_distribution<size_t> booldist(0, 1);
99  std::uniform_int_distribution<size_t> pixeldist(0, 15);
100  uint8_t pixel;
101 
102  if (binary_only)
103  {
104  pixel = booldist(rnd) ? 15 : 0;
105  }
106  else
107  {
108  pixel = pixeldist(rnd);
109  }
110 
111  bool prepend = booldist(rnd);
112 
113  if (prepend)
114  {
115  d.replacement.insert(d.replacement.begin(), pixel);
116  }
117  else
118  {
119  d.replacement.push_back(pixel);
120  }
121  }
122 
123  trial.SetDictionaryEntry(index, d);
124 
125  size_t newsize = get_encoded_size(trial);
126 
127  if (newsize < size)
128  {
129  d.score = size - newsize;
130  datafile.SetDictionaryEntry(index, d);
131  size = newsize;
132 
133  if (verbose)
134  std::cout << "optimize_expand: expanded " << index
135  << " by " << count << " pixels, score " << d.score << std::endl;
136  }
137 }
138 
139 // Try to trim random dictionary entry.
140 void optimize_trim(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
141 {
142  DataFile trial = datafile;
143  std::uniform_int_distribution<size_t> dist1(0, DataFile::dictionarysize - 1);
144  size_t index = dist1(rnd);
145  DataFile::dictentry_t d = trial.GetDictionaryEntry(index);
146 
147  if (d.replacement.size() <= 2) return;
148 
149  std::uniform_int_distribution<size_t> dist2(0, std::min((int)d.replacement.size() / 2, 5));
150  size_t start = dist2(rnd);
151  size_t end = dist2(rnd);
152 
153  if (start)
154  {
155  d.replacement.erase(d.replacement.begin(), d.replacement.begin() + start);
156  }
157 
158  if (end)
159  {
160  d.replacement.erase(d.replacement.end() - end, d.replacement.end() - 1);
161  }
162 
163  trial.SetDictionaryEntry(index, d);
164 
165  size_t newsize = get_encoded_size(trial);
166 
167  if (newsize < size)
168  {
169  d.score = size - newsize;
170  datafile.SetDictionaryEntry(index, d);
171  size = newsize;
172 
173  if (verbose)
174  std::cout << "optimize_trim: trimmed " << index
175  << " by " << start << " pixels from start and "
176  << end << " pixels from end, score " << d.score << std::endl;
177  }
178 }
179 
180 // Switch random dictionary entry to use ref encoding or back to rle.
181 void optimize_refdict(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
182 {
183  DataFile trial = datafile;
184  std::uniform_int_distribution<size_t> dist1(0, DataFile::dictionarysize - 1);
185  size_t index = dist1(rnd);
186  DataFile::dictentry_t d = trial.GetDictionaryEntry(index);
187 
188  d.ref_encode = !d.ref_encode;
189 
190  trial.SetDictionaryEntry(index, d);
191 
192  size_t newsize = get_encoded_size(trial);
193 
194  if (newsize < size)
195  {
196  d.score = size - newsize;
197  datafile.SetDictionaryEntry(index, d);
198  size = newsize;
199 
200  if (verbose)
201  std::cout << "optimize_refdict: switched " << index
202  << " to " << (d.ref_encode ? "ref" : "RLE")
203  << ", score " << d.score << std::endl;
204  }
205 }
206 
207 // Combine two random dictionary entries.
208 void optimize_combine(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
209 {
210  DataFile trial = datafile;
211  std::uniform_int_distribution<size_t> dist1(0, DataFile::dictionarysize - 1);
212  size_t worst = datafile.GetLowScoreIndex();
213  size_t index1 = dist1(rnd);
214  size_t index2 = dist1(rnd);
215 
216  const DataFile::pixels_t &part1 = datafile.GetDictionaryEntry(index1).replacement;
217  const DataFile::pixels_t &part2 = datafile.GetDictionaryEntry(index2).replacement;
218 
219  DataFile::dictentry_t d;
220  d.replacement = part1;
221  d.replacement.insert(d.replacement.end(), part2.begin(), part2.end());
222  d.ref_encode = true;
223  trial.SetDictionaryEntry(worst, d);
224 
225  size_t newsize = get_encoded_size(trial);
226 
227  if (newsize < size)
228  {
229  d.score = size - newsize;
230  datafile.SetDictionaryEntry(worst, d);
231  size = newsize;
232 
233  if (verbose)
234  std::cout << "optimize_combine: combined " << index1
235  << " and " << index2 << " to replace " << worst
236  << ", score " << d.score << std::endl;
237  }
238 }
239 
240 // Pick a random part of an encoded glyph and encode it as a ref dict.
241 void optimize_encpart(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
242 {
243  std::unique_ptr<encoded_font_t> e = encode_font(datafile);
244 
245  // Pick a random encoded glyph
246  std::uniform_int_distribution<size_t> dist1(0, datafile.GetGlyphCount() - 1);
247  size_t index = dist1(rnd);
248  const encoded_font_t::refstring_t &refstr = e->glyphs.at(index);
249 
250  if (refstr.size() < 2)
251  return;
252 
253  // Pick a random part of it
254  std::uniform_int_distribution<size_t> dist2(2, refstr.size());
255  size_t length = dist2(rnd);
256  std::uniform_int_distribution<size_t> dist3(0, refstr.size() - length);
257  size_t start = dist3(rnd);
258 
259  // Decode that part
260  encoded_font_t::refstring_t substr(refstr.begin() + start,
261  refstr.begin() + start + length);
262  std::unique_ptr<DataFile::pixels_t> decoded =
263  decode_glyph(*e, substr, datafile.GetFontInfo());
264 
265  // Add that as a new dictionary entry
266  DataFile trial = datafile;
267  size_t worst = trial.GetLowScoreIndex();
268  DataFile::dictentry_t d = trial.GetDictionaryEntry(worst);
269  d.replacement = *decoded;
270  d.ref_encode = true;
271  trial.SetDictionaryEntry(worst, d);
272 
273  size_t newsize = get_encoded_size(trial);
274 
275  if (newsize < size)
276  {
277  d.score = size - newsize;
278  datafile.SetDictionaryEntry(worst, d);
279  size = newsize;
280 
281  if (verbose)
282  std::cout << "optimize_encpart: replaced " << worst
283  << " score " << d.score << std::endl;
284  }
285 }
286 
287 // Execute all the optimization algorithms once.
288 void optimize_pass(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose)
289 {
290  optimize_worst(datafile, size, rnd, verbose);
291  optimize_any(datafile, size, rnd, verbose);
292  optimize_expand(datafile, size, rnd, verbose, false);
293  optimize_expand(datafile, size, rnd, verbose, true);
294  optimize_trim(datafile, size, rnd, verbose);
295  optimize_refdict(datafile, size, rnd, verbose);
296  optimize_combine(datafile, size, rnd, verbose);
297  optimize_encpart(datafile, size, rnd, verbose);
298 }
299 
300 // Execute multiple passes in parallel and take the one with the best result.
301 // The amount of parallelism is hardcoded in order to retain deterministic
302 // behaviour.
303 void optimize_parallel(DataFile &datafile, size_t &size, rnd_t &rnd, bool verbose, int num_threads = 4)
304 {
305  std::vector<DataFile> datafiles;
306  std::vector<size_t> sizes;
307  std::vector<rnd_t> rnds;
308  std::vector<std::unique_ptr<std::thread> > threads;
309 
310  for (int i = 0; i < num_threads; i++)
311  {
312  datafiles.emplace_back(datafile);
313  sizes.emplace_back(size);
314  rnds.emplace_back(rnd());
315  }
316 
317  for (int i = 0; i < num_threads; i++)
318  {
319  threads.emplace_back(new std::thread(optimize_pass,
320  std::ref(datafiles.at(i)),
321  std::ref(sizes.at(i)),
322  std::ref(rnds.at(i)),
323  verbose));
324  }
325 
326  for (int i = 0; i < num_threads; i++)
327  {
328  threads.at(i)->join();
329  }
330 
331  int best = std::min_element(sizes.begin(), sizes.end()) - sizes.begin();
332  size = sizes.at(best);
333  datafile = datafiles.at(best);
334 }
335 
336 // Go through all the dictionary entries and check what it costs to remove
337 // them. Removes any entries with negative or zero score.
338 void update_scores(DataFile &datafile, bool verbose)
339 {
340  size_t oldsize = get_encoded_size(datafile);
341 
342  for (size_t i = 0; i < DataFile::dictionarysize; i++)
343  {
344  DataFile trial = datafile;
345  DataFile::dictentry_t dummy = {};
346  trial.SetDictionaryEntry(i, dummy);
347  size_t newsize = get_encoded_size(trial);
348 
349  DataFile::dictentry_t d = datafile.GetDictionaryEntry(i);
350  d.score = newsize - oldsize;
351 
352  if (d.score > 0)
353  {
354  datafile.SetDictionaryEntry(i, d);
355  }
356  else
357  {
358  datafile.SetDictionaryEntry(i, dummy);
359 
360  if (verbose && d.replacement.size() != 0)
361  std::cout << "update_scores: dropped " << i
362  << " score " << -d.score << std::endl;
363  }
364  }
365 }
366 
367 void init_dictionary(DataFile &datafile)
368 {
369  rnd_t rnd(datafile.GetSeed());
370 
371  if (datafile.GetGlyphCount() == 0)
372  return;
373 
374  std::set<DataFile::pixels_t> seen_substrings;
375  std::set<DataFile::pixels_t> added_substrings;
376 
377  size_t i = 0;
378  while (i < DataFile::dictionarysize)
379  {
380  DataFile::pixels_t substring = *random_substring(datafile, rnd);
381 
382  if (!seen_substrings.count(substring))
383  {
384  seen_substrings.insert(substring);
385  }
386  else if (!added_substrings.count(substring))
387  {
388  // When we see a substring second time, add it.
389  DataFile::dictentry_t d;
390  d.score = 0;
391  d.replacement = substring;
392  datafile.SetDictionaryEntry(i, d);
393  i++;
394  added_substrings.insert(substring);
395  }
396  }
397 }
398 
399 void optimize(DataFile &datafile, size_t iterations)
400 {
401  bool verbose = false;
402  rnd_t rnd(datafile.GetSeed());
403 
404  update_scores(datafile, verbose);
405 
406  size_t size = get_encoded_size(datafile);
407 
408  for (size_t i = 0; i < iterations; i++)
409  {
410  optimize_parallel(datafile, size, rnd, verbose);
411  }
412 
413  std::uniform_int_distribution<size_t> dist(0, std::numeric_limits<uint32_t>::max());
414  datafile.SetSeed(dist(rnd));
415 }
416 
417 }}