HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
H5ACpublic.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group. *
3  * Copyright by the Board of Trustees of the University of Illinois. *
4  * All rights reserved. *
5  * *
6  * This file is part of HDF5. The full HDF5 copyright notice, including *
7  * terms governing use, modification, and redistribution, is contained in *
8  * the files COPYING and Copyright.html. COPYING can be found at the root *
9  * of the source code distribution tree; Copyright.html can be found at the *
10  * root level of an installed copy of the electronic HDF5 document set and *
11  * is linked from the top-level documents page. It can also be found at *
12  * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have *
13  * access to either file, you may request a copy from help@hdfgroup.org. *
14  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /*-------------------------------------------------------------------------
17  *
18  * Created: H5ACpublic.h
19  * Jul 10 1997
20  * Robb Matzke <matzke@llnl.gov>
21  *
22  * Purpose: Public include file for cache functions.
23  *
24  * Modifications:
25  *
26  *-------------------------------------------------------------------------
27  */
28 #ifndef _H5ACpublic_H
29 #define _H5ACpublic_H
30 
31 /* Public headers needed by this file */
32 #include "H5public.h"
33 #include "H5Cpublic.h"
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 /****************************************************************************
40  *
41  * structure H5AC_cache_config_t
42  *
43  * H5AC_cache_config_t is a public structure intended for use in public APIs.
44  * At least in its initial incarnation, it is basicaly a copy of struct
45  * H5C_auto_size_ctl_t, minus the report_fcn field, and plus the
46  * dirty_bytes_threshold field.
47  *
48  * The report_fcn field is omitted, as including it would require us to
49  * make H5C_t structure public.
50  *
51  * The dirty_bytes_threshold field does not appear in H5C_auto_size_ctl_t,
52  * as synchronization between caches on different processes is handled at
53  * the H5AC level, not at the level of H5C. Note however that there is
54  * considerable interaction between this value and the other fields in this
55  * structure.
56  *
57  * Similarly, the open_trace_file, close_trace_file, and trace_file_name
58  * fields do not appear in H5C_auto_size_ctl_t, as most trace file
59  * issues are handled at the H5AC level. The one exception is storage of
60  * the pointer to the trace file, which is handled by H5C.
61  *
62  * The structure is in H5ACpublic.h as we may wish to allow different
63  * configuration options for metadata and raw data caches.
64  *
65  * The fields of the structure are discussed individually below:
66  *
67  * version: Integer field containing the version number of this version
68  * of the H5AC_cache_config_t structure. Any instance of
69  * H5AC_cache_config_t passed to the cache must have a known
70  * version number, or an error will be flagged.
71  *
72  * rpt_fcn_enabled: Boolean field used to enable and disable the default
73  * reporting function. This function is invoked every time the
74  * automatic cache resize code is run, and reports on its activities.
75  *
76  * This is a debugging function, and should normally be turned off.
77  *
78  * open_trace_file: Boolean field indicating whether the trace_file_name
79  * field should be used to open a trace file for the cache.
80  *
81  * The trace file is a debuging feature that allow the capture of
82  * top level metadata cache requests for purposes of debugging and/or
83  * optimization. This field should normally be set to FALSE, as
84  * trace file collection imposes considerable overhead.
85  *
86  * This field should only be set to TRUE when the trace_file_name
87  * contains the full path of the desired trace file, and either
88  * there is no open trace file on the cache, or the close_trace_file
89  * field is also TRUE.
90  *
91  * close_trace_file: Boolean field indicating whether the current trace
92  * file (if any) should be closed.
93  *
94  * See the above comments on the open_trace_file field. This field
95  * should be set to FALSE unless there is an open trace file on the
96  * cache that you wish to close.
97  *
98  * trace_file_name: Full path of the trace file to be opened if the
99  * open_trace_file field is TRUE.
100  *
101  * In the parallel case, an ascii representation of the mpi rank of
102  * the process will be appended to the file name to yield a unique
103  * trace file name for each process.
104  *
105  * The length of the path must not exceed H5AC__MAX_TRACE_FILE_NAME_LEN
106  * characters.
107  *
108  * evictions_enabled: Boolean field used to either report the current
109  * evictions enabled status of the cache, or to set the cache's
110  * evictions enabled status.
111  *
112  * In general, the metadata cache should always be allowed to
113  * evict entries. However, in some cases it is advantageous to
114  * disable evictions briefly, and thereby postpone metadata
115  * writes. However, this must be done with care, as the cache
116  * can grow quickly. If you do this, re-enable evictions as
117  * soon as possible and monitor cache size.
118  *
119  * At present, evictions can only be disabled if automatic
120  * cache resizing is also disabled (that is, ( incr_mode ==
121  * H5C_incr__off ) && ( decr_mode == H5C_decr__off )). There
122  * is no logical reason why this should be so, but it simplifies
123  * implementation and testing, and I can't think of any reason
124  * why it would be desireable. If you can think of one, I'll
125  * revisit the issue.
126  *
127  * set_initial_size: Boolean flag indicating whether the size of the
128  * initial size of the cache is to be set to the value given in
129  * the initial_size field. If set_initial_size is FALSE, the
130  * initial_size field is ignored.
131  *
132  * initial_size: If enabled, this field contain the size the cache is
133  * to be set to upon receipt of this structure. Needless to say,
134  * initial_size must lie in the closed interval [min_size, max_size].
135  *
136  * min_clean_fraction: double in the range 0 to 1 indicating the fraction
137  * of the cache that is to be kept clean. This field is only used
138  * in parallel mode. Typical values are 0.1 to 0.5.
139  *
140  * max_size: Maximum size to which the cache can be adjusted. The
141  * supplied value must fall in the closed interval
142  * [MIN_MAX_CACHE_SIZE, MAX_MAX_CACHE_SIZE]. Also, max_size must
143  * be greater than or equal to min_size.
144  *
145  * min_size: Minimum size to which the cache can be adjusted. The
146  * supplied value must fall in the closed interval
147  * [H5C__MIN_MAX_CACHE_SIZE, H5C__MAX_MAX_CACHE_SIZE]. Also, min_size
148  * must be less than or equal to max_size.
149  *
150  * epoch_length: Number of accesses on the cache over which to collect
151  * hit rate stats before running the automatic cache resize code,
152  * if it is enabled.
153  *
154  * At the end of an epoch, we discard prior hit rate data and start
155  * collecting afresh. The epoch_length must lie in the closed
156  * interval [H5C__MIN_AR_EPOCH_LENGTH, H5C__MAX_AR_EPOCH_LENGTH].
157  *
158  *
159  * Cache size increase control fields:
160  *
161  * incr_mode: Instance of the H5C_cache_incr_mode enumerated type whose
162  * value indicates how we determine whether the cache size should be
163  * increased. At present there are two possible values:
164  *
165  * H5C_incr__off: Don't attempt to increase the size of the cache
166  * automatically.
167  *
168  * When this increment mode is selected, the remaining fields
169  * in the cache size increase section ar ignored.
170  *
171  * H5C_incr__threshold: Attempt to increase the size of the cache
172  * whenever the average hit rate over the last epoch drops
173  * below the value supplied in the lower_hr_threshold
174  * field.
175  *
176  * Note that this attempt will fail if the cache is already
177  * at its maximum size, or if the cache is not already using
178  * all available space.
179  *
180  * Note that you must set decr_mode to H5C_incr__off if you
181  * disable metadata cache entry evictions.
182  *
183  * lower_hr_threshold: Lower hit rate threshold. If the increment mode
184  * (incr_mode) is H5C_incr__threshold and the hit rate drops below the
185  * value supplied in this field in an epoch, increment the cache size by
186  * size_increment. Note that cache size may not be incremented above
187  * max_size, and that the increment may be further restricted by the
188  * max_increment field if it is enabled.
189  *
190  * When enabled, this field must contain a value in the range [0.0, 1.0].
191  * Depending on the incr_mode selected, it may also have to be less than
192  * upper_hr_threshold.
193  *
194  * increment: Double containing the multiplier used to derive the new
195  * cache size from the old if a cache size increment is triggered.
196  * The increment must be greater than 1.0, and should not exceed 2.0.
197  *
198  * The new cache size is obtained my multiplying the current max cache
199  * size by the increment, and then clamping to max_size and to stay
200  * within the max_increment as necessary.
201  *
202  * apply_max_increment: Boolean flag indicating whether the max_increment
203  * field should be used to limit the maximum cache size increment.
204  *
205  * max_increment: If enabled by the apply_max_increment field described
206  * above, this field contains the maximum number of bytes by which the
207  * cache size can be increased in a single re-size.
208  *
209  * flash_incr_mode: Instance of the H5C_cache_flash_incr_mode enumerated
210  * type whose value indicates whether and by which algorithm we should
211  * make flash increases in the size of the cache to accomodate insertion
212  * of large entries and large increases in the size of a single entry.
213  *
214  * The addition of the flash increment mode was occasioned by performance
215  * problems that appear when a local heap is increased to a size in excess
216  * of the current cache size. While the existing re-size code dealt with
217  * this eventually, performance was very bad for the remainder of the
218  * epoch.
219  *
220  * At present, there are two possible values for the flash_incr_mode:
221  *
222  * H5C_flash_incr__off: Don't perform flash increases in the size of
223  * the cache.
224  *
225  * H5C_flash_incr__add_space: Let x be either the size of a newly
226  * newly inserted entry, or the number of bytes by which the
227  * size of an existing entry has been increased.
228  *
229  * If
230  * x > flash_threshold * current max cache size,
231  *
232  * increase the current maximum cache size by x * flash_multiple
233  * less any free space in the cache, and star a new epoch. For
234  * now at least, pay no attention to the maximum increment.
235  *
236  * In both of the above cases, the flash increment pays no attention to
237  * the maximum increment (at least in this first incarnation), but DOES
238  * stay within max_size.
239  *
240  * With a little thought, it should be obvious that the above flash
241  * cache size increase algorithm is not sufficient for all circumstances
242  * -- for example, suppose the user round robins through
243  * (1/flash_threshold) +1 groups, adding one data set to each on each
244  * pass. Then all will increase in size at about the same time, requiring
245  * the max cache size to at least double to maintain acceptable
246  * performance, however the above flash increment algorithm will not be
247  * triggered.
248  *
249  * Hopefully, the add space algorithms detailed above will be sufficient
250  * for the performance problems encountered to date. However, we should
251  * expect to revisit the issue.
252  *
253  * flash_multiple: Double containing the multiple described above in the
254  * H5C_flash_incr__add_space section of the discussion of the
255  * flash_incr_mode section. This field is ignored unless flash_incr_mode
256  * is H5C_flash_incr__add_space.
257  *
258  * flash_threshold: Double containing the factor by which current max cache
259  * size is multiplied to obtain the size threshold for the add_space flash
260  * increment algorithm. The field is ignored unless flash_incr_mode is
261  * H5C_flash_incr__add_space.
262  *
263  *
264  * Cache size decrease control fields:
265  *
266  * decr_mode: Instance of the H5C_cache_decr_mode enumerated type whose
267  * value indicates how we determine whether the cache size should be
268  * decreased. At present there are four possibilities.
269  *
270  * H5C_decr__off: Don't attempt to decrease the size of the cache
271  * automatically.
272  *
273  * When this increment mode is selected, the remaining fields
274  * in the cache size decrease section are ignored.
275  *
276  * H5C_decr__threshold: Attempt to decrease the size of the cache
277  * whenever the average hit rate over the last epoch rises
278  * above the value supplied in the upper_hr_threshold
279  * field.
280  *
281  * H5C_decr__age_out: At the end of each epoch, search the cache for
282  * entries that have not been accessed for at least the number
283  * of epochs specified in the epochs_before_eviction field, and
284  * evict these entries. Conceptually, the maximum cache size
285  * is then decreased to match the new actual cache size. However,
286  * this reduction may be modified by the min_size, the
287  * max_decrement, and/or the empty_reserve.
288  *
289  * H5C_decr__age_out_with_threshold: Same as age_out, but we only
290  * attempt to reduce the cache size when the hit rate observed
291  * over the last epoch exceeds the value provided in the
292  * upper_hr_threshold field.
293  *
294  * Note that you must set decr_mode to H5C_decr__off if you
295  * disable metadata cache entry evictions.
296  *
297  * upper_hr_threshold: Upper hit rate threshold. The use of this field
298  * varies according to the current decr_mode:
299  *
300  * H5C_decr__off or H5C_decr__age_out: The value of this field is
301  * ignored.
302  *
303  * H5C_decr__threshold: If the hit rate exceeds this threshold in any
304  * epoch, attempt to decrement the cache size by size_decrement.
305  *
306  * Note that cache size may not be decremented below min_size.
307  *
308  * Note also that if the upper_threshold is 1.0, the cache size
309  * will never be reduced.
310  *
311  * H5C_decr__age_out_with_threshold: If the hit rate exceeds this
312  * threshold in any epoch, attempt to reduce the cache size
313  * by evicting entries that have not been accessed for more
314  * than the specified number of epochs.
315  *
316  * decrement: This field is only used when the decr_mode is
317  * H5C_decr__threshold.
318  *
319  * The field is a double containing the multiplier used to derive the
320  * new cache size from the old if a cache size decrement is triggered.
321  * The decrement must be in the range 0.0 (in which case the cache will
322  * try to contract to its minimum size) to 1.0 (in which case the
323  * cache will never shrink).
324  *
325  * apply_max_decrement: Boolean flag used to determine whether decrements
326  * in cache size are to be limited by the max_decrement field.
327  *
328  * max_decrement: Maximum number of bytes by which the cache size can be
329  * decreased in a single re-size. Note that decrements may also be
330  * restricted by the min_size of the cache, and (in age out modes) by
331  * the empty_reserve field.
332  *
333  * epochs_before_eviction: Integer field used in H5C_decr__age_out and
334  * H5C_decr__age_out_with_threshold decrement modes.
335  *
336  * This field contains the number of epochs an entry must remain
337  * unaccessed before it is evicted in an attempt to reduce the
338  * cache size. If applicable, this field must lie in the range
339  * [1, H5C__MAX_EPOCH_MARKERS].
340  *
341  * apply_empty_reserve: Boolean field controlling whether the empty_reserve
342  * field is to be used in computing the new cache size when the
343  * decr_mode is H5C_decr__age_out or H5C_decr__age_out_with_threshold.
344  *
345  * empty_reserve: To avoid a constant racheting down of cache size by small
346  * amounts in the H5C_decr__age_out and H5C_decr__age_out_with_threshold
347  * modes, this field allows one to require that any cache size
348  * reductions leave the specified fraction of unused space in the cache.
349  *
350  * The value of this field must be in the range [0.0, 1.0]. I would
351  * expect typical values to be in the range of 0.01 to 0.1.
352  *
353  *
354  * Parallel Configuration Fields:
355  *
356  * In PHDF5, all operations that modify metadata must be executed collectively.
357  *
358  * We used to think that this was enough to ensure consistency across the
359  * metadata caches, but since we allow processes to read metadata individually,
360  * the order of dirty entries in the LRU list can vary across processes,
361  * which can result in inconsistencies between the caches.
362  *
363  * PHDF5 uses several strategies to prevent such inconsistencies in metadata,
364  * all of which use the fact that the same stream of dirty metadata is seen
365  * by all processes for purposes of synchronization. This is done by
366  * having each process count the number of bytes of dirty metadata generated,
367  * and then running a "sync point" whenever this count exceeds a user
368  * specified threshold (see dirty_bytes_threshold below).
369  *
370  * The current metadata write strategy is indicated by the
371  * metadata_write_strategy field. The possible values of this field, along
372  * with the associated metadata write strategies are discussed below.
373  *
374  * dirty_bytes_threshold: Threshold of dirty byte creation used to
375  * synchronize updates between caches. (See above for outline and
376  * motivation.)
377  *
378  * This value MUST be consistant across all processes accessing the
379  * file. This field is ignored unless HDF5 has been compiled for
380  * parallel.
381  *
382  * metadata_write_strategy: Integer field containing a code indicating the
383  * desired metadata write strategy. The valid values of this field
384  * are enumerated and discussed below:
385  *
386  *
387  * H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
388  *
389  * When metadata_write_strategy is set to this value, only process
390  * zero is allowed to write dirty metadata to disk. All other
391  * processes must retain dirty metadata until they are informed at
392  * a sync point that the dirty metadata in question has been written
393  * to disk.
394  *
395  * When the sync point is reached (or when there is a user generated
396  * flush), process zero flushes sufficient entries to bring it into
397  * complience with its min clean size (or flushes all dirty entries in
398  * the case of a user generated flush), broad casts the list of
399  * entries just cleaned to all the other processes, and then exits
400  * the sync point.
401  *
402  * Upon receipt of the broadcast, the other processes mark the indicated
403  * entries as clean, and leave the sync point as well.
404  *
405  *
406  * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
407  *
408  * In the distributed metadata write strategy, process zero still makes
409  * the decisions as to what entries should be flushed, but the actual
410  * flushes are distributed across the processes in the computation to
411  * the extent possible.
412  *
413  * In this strategy, when a sync point is triggered (either by dirty
414  * metadata creation or manual flush), all processes enter a barrier.
415  *
416  * On the other side of the barrier, process 0 constructs an ordered
417  * list of the entries to be flushed, and then broadcasts this list
418  * to the caches in all the processes.
419  *
420  * All processes then scan the list of entries to be flushed, flushing
421  * some, and marking the rest as clean. The algorithm for this purpose
422  * ensures that each entry in the list is flushed exactly once, and
423  * all are marked clean in each cache.
424  *
425  * Note that in the case of a flush of the cache, no message passing
426  * is necessary, as all processes have the same list of dirty entries,
427  * and all of these entries must be flushed. Thus in this case it is
428  * sufficient for each process to sort its list of dirty entries after
429  * leaving the initial barrier, and use this list as if it had been
430  * received from process zero.
431  *
432  * To avoid possible messages from the past/future, all caches must
433  * wait until all caches are done before leaving the sync point.
434  *
435  ****************************************************************************/
436 
437 #define H5AC__CURR_CACHE_CONFIG_VERSION 1
438 #define H5AC__MAX_TRACE_FILE_NAME_LEN 1024
439 
440 #define H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY 0
441 #define H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED 1
442 
443 typedef struct H5AC_cache_config_t
444 {
445  /* general configuration fields: */
446  int version;
447 
449 
453 
455 
457  size_t initial_size;
458 
460 
461  size_t max_size;
462  size_t min_size;
463 
464  long int epoch_length;
465 
466 
467  /* size increase control fields: */
469 
471 
472  double increment;
473 
476 
480 
481 
482  /* size decrease control fields: */
484 
486 
487  double decrement;
488 
491 
493 
496 
497 
498  /* parallel configuration fields: */
501 
503 
504 
505 #ifdef __cplusplus
506 }
507 #endif
508 #endif
H5C_cache_flash_incr_mode
Definition: H5Cpublic.h:44
unsigned int hbool_t
Definition: H5public.h:125
hbool_t rpt_fcn_enabled
Definition: H5ACpublic.h:448
hbool_t apply_max_increment
Definition: H5ACpublic.h:474
long int epoch_length
Definition: H5ACpublic.h:464
H5C_cache_decr_mode
Definition: H5Cpublic.h:50
hbool_t set_initial_size
Definition: H5ACpublic.h:456
#define H5AC__MAX_TRACE_FILE_NAME_LEN
Definition: H5ACpublic.h:438
double min_clean_fraction
Definition: H5ACpublic.h:459
enum H5C_cache_incr_mode incr_mode
Definition: H5ACpublic.h:468
hbool_t open_trace_file
Definition: H5ACpublic.h:450
char trace_file_name[H5AC__MAX_TRACE_FILE_NAME_LEN+1]
Definition: H5ACpublic.h:452
hbool_t apply_empty_reserve
Definition: H5ACpublic.h:494
enum H5C_cache_decr_mode decr_mode
Definition: H5ACpublic.h:483
double lower_hr_threshold
Definition: H5ACpublic.h:470
enum H5C_cache_flash_incr_mode flash_incr_mode
Definition: H5ACpublic.h:477
hbool_t apply_max_decrement
Definition: H5ACpublic.h:489
hbool_t evictions_enabled
Definition: H5ACpublic.h:454
double upper_hr_threshold
Definition: H5ACpublic.h:485
H5C_cache_incr_mode
Definition: H5Cpublic.h:38
hbool_t close_trace_file
Definition: H5ACpublic.h:451
struct H5AC_cache_config_t H5AC_cache_config_t