GPAK  1.0.0
A general-purpose archive library

◆ _gpak_compressor_generate_dictionary()

GPAK_API int32_t _gpak_compressor_generate_dictionary ( gpak_t _pak)
Brief Description:\n Generates a compression dictionary for the specified G-PAK archive.
This function generates a compression dictionary for the specified G-PAK archive, which can be used to improve the compression ratio for certain algorithms.
Parameters
_pakA pointer to the gpak_t.
Returns
A non-negative value if the dictionary generation is successful, or a negative value if an error occurred.

Definition at line 358 of file gpak_compressors.c.

359 {
360  char* samples = NULL;
361  size_t sample_sizes[_DICTIONARY_SAMPLE_COUNT];
362  size_t samples_count = 0ull;
363  size_t samples_capacity = 0ull;
364  size_t current_offset = 0ull;
365 
367  filesystem_tree_node_t* next_directory = _pak->root_;
368  do
369  {
370  if (samples_count >= _DICTIONARY_SAMPLE_COUNT - 1)
371  break;
372 
373  filesystem_tree_file_t* next_file = NULL;
374  while ((next_file = filesystem_iterator_next_file(iterator)))
375  {
376  FILE* _infile = fopen(next_file->path_, "rb");
377 
378  fseek(_infile, 0, SEEK_END);
379  sample_sizes[samples_count] = ftell(_infile);
380  fseek(_infile, 0, SEEK_SET);
381 
382  samples_capacity += sample_sizes[samples_count];
383  samples = (char*)realloc(samples, samples_capacity);
384 
385  _freadb(samples + current_offset, 1ull, sample_sizes[samples_count], _infile);
386 
387  current_offset += sample_sizes[samples_count];
388 
389  fclose(_infile);
390 
391  if (samples_count >= _DICTIONARY_SAMPLE_COUNT - 1)
392  break;
393 
394  ++samples_count;
395  }
396  } while ((next_directory = filesystem_iterator_next_directory(iterator)));
397 
398  filesystem_iterator_free(iterator);
399 
400  size_t average_file_size = 0ull;
401 
402  for (size_t idx = 0ull; idx < samples_count; ++idx)
403  average_file_size += sample_sizes[idx] / samples_count;
404 
405  size_t nearest_pow_of_2 = 1;
406  while (nearest_pow_of_2 < average_file_size)
407  nearest_pow_of_2 *= 2;
408 
409  _pak->header_.dictionary_size_ = nearest_pow_of_2;
410 
411  _pak->dictionary_ = (char*)malloc(_pak->header_.dictionary_size_);
412  _pak->header_.dictionary_size_ = ZDICT_trainFromBuffer(_pak->dictionary_, _pak->header_.dictionary_size_, samples, sample_sizes, samples_count);
413  _pak->dictionary_ = (char*)realloc(_pak->dictionary_, _pak->header_.dictionary_size_);
414 
415  fseek(_pak->stream_, sizeof(pak_header_t), SEEK_SET);
416 
417  _fwriteb(_pak->dictionary_, 1ull, _pak->header_.dictionary_size_, _pak->stream_);
418 
419  free(samples);
420 
421  return _gpak_make_error(_pak, GPAK_ERROR_OK);
422 }
GPAK_API filesystem_tree_node_t * filesystem_iterator_next_directory(filesystem_tree_iterator_t *_iterator)
GPAK_API void filesystem_iterator_free(filesystem_tree_iterator_t *_iterator)
GPAK_API filesystem_tree_iterator_t * filesystem_iterator_create(filesystem_tree_node_t *_root)
GPAK_API filesystem_tree_file_t * filesystem_iterator_next_file(filesystem_tree_iterator_t *_iterator)
@ GPAK_ERROR_OK
Definition: gpak_data.h:155
uint32_t dictionary_size_
Definition: gpak_data.h:116
char * dictionary_
Definition: gpak_data.h:262
struct filesystem_tree_node * root_
Definition: gpak_data.h:260
FILE * stream_
Definition: gpak_data.h:259
pak_header_t header_
Definition: gpak_data.h:258

References gpak::dictionary_, gpak_header::dictionary_size_, filesystem_iterator_create(), filesystem_iterator_free(), filesystem_iterator_next_directory(), filesystem_iterator_next_file(), GPAK_ERROR_OK, gpak::header_, filesystem_tree_file::path_, gpak::root_, and gpak::stream_.

Referenced by gpak_close().