LCOV - code coverage report
Current view: top level - lib/tdb/common - freelist.c (source / functions) Hit Total Coverage
Test: coverage report for abartlet/fix-coverage dd10fb34 Lines: 181 262 69.1 %
Date: 2021-09-23 10:06:22 Functions: 14 16 87.5 %

          Line data    Source code
       1             :  /*
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    trivial database library
       5             : 
       6             :    Copyright (C) Andrew Tridgell              1999-2005
       7             :    Copyright (C) Paul `Rusty' Russell              2000
       8             :    Copyright (C) Jeremy Allison                    2000-2003
       9             : 
      10             :      ** NOTE! The following LGPL license applies to the tdb
      11             :      ** library. This does NOT imply that all of Samba is released
      12             :      ** under the LGPL
      13             : 
      14             :    This library is free software; you can redistribute it and/or
      15             :    modify it under the terms of the GNU Lesser General Public
      16             :    License as published by the Free Software Foundation; either
      17             :    version 3 of the License, or (at your option) any later version.
      18             : 
      19             :    This library is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      22             :    Lesser General Public License for more details.
      23             : 
      24             :    You should have received a copy of the GNU Lesser General Public
      25             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      26             : */
      27             : 
      28             : #include "tdb_private.h"
      29             : 
      30             : /* read a freelist record and check for simple errors */
      31   420115495 : int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct tdb_record *rec)
      32             : {
      33   420115495 :         if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
      34           0 :                 return -1;
      35             : 
      36   420115495 :         if (rec->magic == TDB_MAGIC) {
      37             :                 /* this happens when a app is showdown while deleting a record - we should
      38             :                    not completely fail when this happens */
      39           0 :                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%u - fixing\n",
      40             :                          rec->magic, off));
      41           0 :                 rec->magic = TDB_FREE_MAGIC;
      42           0 :                 if (tdb_rec_write(tdb, off, rec) == -1)
      43           0 :                         return -1;
      44             :         }
      45             : 
      46   420115495 :         if (rec->magic != TDB_FREE_MAGIC) {
      47             :                 /* Ensure ecode is set for log fn. */
      48           0 :                 tdb->ecode = TDB_ERR_CORRUPT;
      49           0 :                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%u\n",
      50             :                            rec->magic, off));
      51           0 :                 return -1;
      52             :         }
      53   420115495 :         if (tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
      54           0 :                 return -1;
      55   380246181 :         return 0;
      56             : }
      57             : 
      58             : /* update a record tailer (must hold allocation lock) */
      59   115693760 : static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
      60             :                          const struct tdb_record *rec)
      61             : {
      62             :         tdb_off_t totalsize;
      63             : 
      64             :         /* Offset of tailer from record header */
      65   121758133 :         totalsize = sizeof(*rec) + rec->rec_len;
      66   121758133 :         return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
      67             :                          &totalsize);
      68             : }
      69             : 
      70             : /**
      71             :  * Read the record directly on the left.
      72             :  * Fail if there is no record on the left.
      73             :  */
      74   368079179 : static int read_record_on_left(struct tdb_context *tdb, tdb_off_t rec_ptr,
      75             :                                tdb_off_t *left_p,
      76             :                                struct tdb_record *left_r)
      77             : {
      78             :         tdb_off_t left_ptr;
      79             :         tdb_off_t left_size;
      80             :         struct tdb_record left_rec;
      81             :         int ret;
      82             : 
      83   368079179 :         left_ptr = rec_ptr - sizeof(tdb_off_t);
      84             : 
      85   368079179 :         if (left_ptr <= TDB_DATA_START(tdb->hash_size)) {
      86             :                 /* no record on the left */
      87    46884378 :                 return -1;
      88             :         }
      89             : 
      90             :         /* Read in tailer and jump back to header */
      91   319370330 :         ret = tdb_ofs_read(tdb, left_ptr, &left_size);
      92   319370330 :         if (ret == -1) {
      93           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL,
      94             :                         "tdb_free: left offset read failed at %u\n", left_ptr));
      95           0 :                 return -1;
      96             :         }
      97             : 
      98             :         /* it could be uninitialised data */
      99   319370330 :         if (left_size == 0 || left_size == TDB_PAD_U32) {
     100     3233485 :                 return -1;
     101             :         }
     102             : 
     103   315948282 :         if (left_size > rec_ptr) {
     104           0 :                 return -1;
     105             :         }
     106             : 
     107   315948282 :         left_ptr = rec_ptr - left_size;
     108             : 
     109   315948282 :         if (left_ptr < TDB_DATA_START(tdb->hash_size)) {
     110           0 :                 return -1;
     111             :         }
     112             : 
     113             :         /* Now read in the left record */
     114   578900060 :         ret = tdb->methods->tdb_read(tdb, left_ptr, &left_rec,
     115   315948282 :                                      sizeof(left_rec), DOCONV());
     116   315948282 :         if (ret == -1) {
     117           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL,
     118             :                          "tdb_free: left read failed at %u (%u)\n",
     119             :                          left_ptr, left_size));
     120           0 :                 return -1;
     121             :         }
     122             : 
     123   315948282 :         *left_p = left_ptr;
     124   315948282 :         *left_r = left_rec;
     125             : 
     126   315948282 :         return 0;
     127             : }
     128             : 
     129             : /**
     130             :  * Merge new freelist record with the direct left neighbour.
     131             :  * This assumes that left_rec represents the record
     132             :  * directly to the left of right_rec and that this is
     133             :  * a freelist record.
     134             :  */
     135     2862213 : static int merge_with_left_record(struct tdb_context *tdb,
     136             :                                   tdb_off_t left_ptr,
     137             :                                   struct tdb_record *left_rec,
     138             :                                   struct tdb_record *right_rec)
     139             : {
     140             :         int ret;
     141             : 
     142     2862213 :         left_rec->rec_len += sizeof(*right_rec) + right_rec->rec_len;
     143             : 
     144     2862213 :         ret = tdb_rec_write(tdb, left_ptr, left_rec);
     145     2862213 :         if (ret == -1) {
     146           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL,
     147             :                          "merge_with_left_record: update_left failed at %u\n",
     148             :                          left_ptr));
     149           0 :                 return -1;
     150             :         }
     151             : 
     152     2921998 :         ret = update_tailer(tdb, left_ptr, left_rec);
     153     2862213 :         if (ret == -1) {
     154           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL,
     155             :                          "merge_with_left_record: update_tailer failed at %u\n",
     156             :                          left_ptr));
     157           0 :                 return -1;
     158             :         }
     159             : 
     160     2802428 :         return 0;
     161             : }
     162             : 
     163             : /**
     164             :  * Check whether the record left of a given freelist record is
     165             :  * also a freelist record, and if so, merge the two records.
     166             :  *
     167             :  * Return code:
     168             :  *  -1 upon error
     169             :  *   0 if left was not a free record
     170             :  *   1 if left was free and successfully merged.
     171             :  *
     172             :  * The current record is handed in with pointer and fully read record.
     173             :  *
     174             :  * The left record pointer and struct can be retrieved as result
     175             :  * in lp and lr;
     176             :  */
     177   368079179 : static int check_merge_with_left_record(struct tdb_context *tdb,
     178             :                                         tdb_off_t rec_ptr,
     179             :                                         struct tdb_record *rec,
     180             :                                         tdb_off_t *lp,
     181             :                                         struct tdb_record *lr)
     182             : {
     183             :         tdb_off_t left_ptr;
     184             :         struct tdb_record left_rec;
     185             :         int ret;
     186             : 
     187   368079179 :         ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
     188   368079179 :         if (ret != 0) {
     189    50117863 :                 return 0;
     190             :         }
     191             : 
     192   315948282 :         if (left_rec.magic != TDB_FREE_MAGIC) {
     193   277954119 :                 return 0;
     194             :         }
     195             : 
     196             :         /* It's free - expand to include it. */
     197     2862213 :         ret = merge_with_left_record(tdb, left_ptr, &left_rec, rec);
     198     2862213 :         if (ret != 0) {
     199           0 :                 return -1;
     200             :         }
     201             : 
     202     2862213 :         if (lp != NULL) {
     203      651637 :                 *lp = left_ptr;
     204             :         }
     205             : 
     206     2862213 :         if (lr != NULL) {
     207      651637 :                 *lr = left_rec;
     208             :         }
     209             : 
     210     2802428 :         return 1;
     211             : }
     212             : 
     213             : /**
     214             :  * Check whether the record left of a given freelist record is
     215             :  * also a freelist record, and if so, merge the two records.
     216             :  *
     217             :  * Return code:
     218             :  *  -1 upon error
     219             :  *   0 if left was not a free record
     220             :  *   1 if left was free and successfully merged.
     221             :  *
     222             :  * In this variant, the input record is specified just as the pointer
     223             :  * and is read from the database if needed.
     224             :  *
     225             :  * next_ptr will contain the original record's next pointer after
     226             :  * successful merging (which will be lost after merging), so that
     227             :  * the caller can update the last pointer.
     228             :  */
     229           0 : static int check_merge_ptr_with_left_record(struct tdb_context *tdb,
     230             :                                             tdb_off_t rec_ptr,
     231             :                                             tdb_off_t *next_ptr)
     232             : {
     233             :         tdb_off_t left_ptr;
     234             :         struct tdb_record rec, left_rec;
     235             :         int ret;
     236             : 
     237           0 :         ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
     238           0 :         if (ret != 0) {
     239           0 :                 return 0;
     240             :         }
     241             : 
     242           0 :         if (left_rec.magic != TDB_FREE_MAGIC) {
     243           0 :                 return 0;
     244             :         }
     245             : 
     246             :         /* It's free - expand to include it. */
     247             : 
     248           0 :         ret = tdb->methods->tdb_read(tdb, rec_ptr, &rec,
     249           0 :                                      sizeof(rec), DOCONV());
     250           0 :         if (ret != 0) {
     251           0 :                 return -1;
     252             :         }
     253             : 
     254           0 :         ret = merge_with_left_record(tdb, left_ptr, &left_rec, &rec);
     255           0 :         if (ret != 0) {
     256           0 :                 return -1;
     257             :         }
     258             : 
     259           0 :         if (next_ptr != NULL) {
     260           0 :                 *next_ptr = rec.next;
     261             :         }
     262             : 
     263           0 :         return 1;
     264             : }
     265             : 
     266             : /**
     267             :  * Add an element into the freelist.
     268             :  *
     269             :  * We merge the new record into the left record if it is also a
     270             :  * free record, but not with the right one. This makes the
     271             :  * operation O(1) instead of O(n): merging with the right record
     272             :  * requires a traverse of the freelist to find the previous
     273             :  * record in the free list.
     274             :  *
     275             :  * This prevents db traverses from being O(n^2) after a lot of deletes.
     276             :  */
     277     5648026 : int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
     278             : {
     279             :         int ret;
     280             : 
     281             :         /* Allocation and tailer lock */
     282     5648026 :         if (tdb_lock(tdb, -1, F_WRLCK) != 0)
     283           0 :                 return -1;
     284             : 
     285             :         /* set an initial tailer, so if we fail we don't leave a bogus record */
     286     5921352 :         if (update_tailer(tdb, offset, rec) != 0) {
     287           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
     288           0 :                 goto fail;
     289             :         }
     290             : 
     291     5648026 :         ret = check_merge_with_left_record(tdb, offset, rec, NULL, NULL);
     292     5648026 :         if (ret == -1) {
     293           0 :                 goto fail;
     294             :         }
     295     5648026 :         if (ret == 1) {
     296             :                 /* merged */
     297     2177669 :                 goto done;
     298             :         }
     299             : 
     300             :         /* Nothing to merge, prepend to free list */
     301             : 
     302     3437450 :         rec->magic = TDB_FREE_MAGIC;
     303             : 
     304     6874900 :         if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
     305     6874900 :             tdb_rec_write(tdb, offset, rec) == -1 ||
     306     3437450 :             tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
     307           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%u\n", offset));
     308           0 :                 goto fail;
     309             :         }
     310             : 
     311     3470357 : done:
     312             :         /* And we're done. */
     313     5648026 :         tdb_unlock(tdb, -1, F_WRLCK);
     314     5648026 :         return 0;
     315             : 
     316           0 :  fail:
     317           0 :         tdb_unlock(tdb, -1, F_WRLCK);
     318           0 :         return -1;
     319             : }
     320             : 
     321             : 
     322             : 
     323             : /*
     324             :    the core of tdb_allocate - called when we have decided which
     325             :    free list entry to use
     326             : 
     327             :    Note that we try to allocate by grabbing data from the end of an existing record,
     328             :    not the beginning. This is so the left merge in a free is more likely to be
     329             :    able to free up the record without fragmentation
     330             :  */
     331    57421654 : static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb,
     332             :                                   tdb_len_t length, tdb_off_t rec_ptr,
     333             :                                   struct tdb_record *rec, tdb_off_t last_ptr)
     334             : {
     335             : #define MIN_REC_SIZE (sizeof(struct tdb_record) + sizeof(tdb_off_t) + 8)
     336             : 
     337    57421654 :         if (rec->rec_len < length + MIN_REC_SIZE) {
     338             :                 /* we have to grab the whole record */
     339             : 
     340             :                 /* unlink it from the previous record */
     341      797707 :                 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
     342           0 :                         return 0;
     343             :                 }
     344             : 
     345             :                 /* mark it not free */
     346      797707 :                 rec->magic = TDB_MAGIC;
     347      797707 :                 if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
     348           0 :                         return 0;
     349             :                 }
     350      797707 :                 return rec_ptr;
     351             :         }
     352             : 
     353             :         /* we're going to just shorten the existing record */
     354    56623947 :         rec->rec_len -= (length + sizeof(*rec));
     355    56623947 :         if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
     356           0 :                 return 0;
     357             :         }
     358    59489578 :         if (update_tailer(tdb, rec_ptr, rec) == -1) {
     359           0 :                 return 0;
     360             :         }
     361             : 
     362             :         /* and setup the new record */
     363    56623947 :         rec_ptr += sizeof(*rec) + rec->rec_len;
     364             : 
     365    56623947 :         memset(rec, '\0', sizeof(*rec));
     366    56623947 :         rec->rec_len = length;
     367    56623947 :         rec->magic = TDB_MAGIC;
     368             : 
     369    56623947 :         if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
     370           0 :                 return 0;
     371             :         }
     372             : 
     373    59489578 :         if (update_tailer(tdb, rec_ptr, rec) == -1) {
     374           0 :                 return 0;
     375             :         }
     376             : 
     377    56623947 :         return rec_ptr;
     378             : }
     379             : 
     380             : /* allocate some space from the free list. The offset returned points
     381             :    to a unconnected tdb_record within the database with room for at
     382             :    least length bytes of total data
     383             : 
     384             :    0 is returned if the space could not be allocated
     385             :  */
     386    57421655 : static tdb_off_t tdb_allocate_from_freelist(
     387             :         struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec)
     388             : {
     389             :         tdb_off_t rec_ptr, last_ptr, newrec_ptr;
     390             :         struct tdb_chainwalk_ctx chainwalk;
     391             :         bool modified;
     392             :         struct {
     393             :                 tdb_off_t rec_ptr, last_ptr;
     394             :                 tdb_len_t rec_len;
     395             :         } bestfit;
     396    57421655 :         float multiplier = 1.0;
     397             :         bool merge_created_candidate;
     398             : 
     399             :         /* over-allocate to reduce fragmentation */
     400    57421655 :         length *= 1.25;
     401             : 
     402             :         /* Extra bytes required for tailer */
     403    57421655 :         length += sizeof(tdb_off_t);
     404    57421655 :         length = TDB_ALIGN(length, TDB_ALIGNMENT);
     405             : 
     406    56434183 :  again:
     407    59329439 :         merge_created_candidate = false;
     408    59329439 :         last_ptr = FREELIST_TOP;
     409             : 
     410             :         /* read in the freelist top */
     411    59329439 :         if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
     412           0 :                 return 0;
     413             : 
     414    59329439 :         modified = false;
     415    59329439 :         tdb_chainwalk_init(&chainwalk, rec_ptr);
     416             : 
     417    59329439 :         bestfit.rec_ptr = 0;
     418    59329439 :         bestfit.last_ptr = 0;
     419    59329439 :         bestfit.rec_len = 0;
     420             : 
     421             :         /*
     422             :            this is a best fit allocation strategy. Originally we used
     423             :            a first fit strategy, but it suffered from massive fragmentation
     424             :            issues when faced with a slowly increasing record size.
     425             :          */
     426   472414729 :         while (rec_ptr) {
     427             :                 int ret;
     428             :                 tdb_off_t left_ptr;
     429             :                 struct tdb_record left_rec;
     430             : 
     431   362431153 :                 if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
     432           1 :                         return 0;
     433             :                 }
     434             : 
     435   362431153 :                 ret = check_merge_with_left_record(tdb, rec_ptr, rec,
     436             :                                                    &left_ptr, &left_rec);
     437   362431153 :                 if (ret == -1) {
     438           0 :                         return 0;
     439             :                 }
     440   362431153 :                 if (ret == 1) {
     441             :                         /* merged */
     442      651637 :                         rec_ptr = rec->next;
     443      651637 :                         ret = tdb_ofs_write(tdb, last_ptr, &rec->next);
     444      651637 :                         if (ret == -1) {
     445           0 :                                 return 0;
     446             :                         }
     447             : 
     448             :                         /*
     449             :                          * We have merged the current record into the left
     450             :                          * neighbour. So our traverse of the freelist will
     451             :                          * skip it and consider the next record in the chain.
     452             :                          *
     453             :                          * But the enlarged left neighbour may be a candidate.
     454             :                          * If it is, we can not directly use it, though.
     455             :                          * The only thing we can do and have to do here is to
     456             :                          * update the current best fit size in the chain if the
     457             :                          * current best fit is the left record. (By that we may
     458             :                          * worsen the best fit we already had, bit this is not a
     459             :                          * problem.)
     460             :                          *
     461             :                          * If the current best fit is not the left record,
     462             :                          * all we can do is remember the fact that a merge
     463             :                          * created a new candidate so that we can trigger
     464             :                          * a second walk of the freelist if at the end of
     465             :                          * the first walk we have not found any fit.
     466             :                          * This way we can avoid expanding the database.
     467             :                          */
     468             : 
     469      651637 :                         if (bestfit.rec_ptr == left_ptr) {
     470       18673 :                                 bestfit.rec_len = left_rec.rec_len;
     471             :                         }
     472             : 
     473      651637 :                         if (left_rec.rec_len > length) {
     474      609161 :                                 merge_created_candidate = true;
     475             :                         }
     476             : 
     477      651637 :                         modified = true;
     478             : 
     479      651637 :                         continue;
     480             :                 }
     481             : 
     482   361779516 :                 if (rec->rec_len >= length) {
     483    65074898 :                         if (bestfit.rec_ptr == 0 ||
     484     4042538 :                             rec->rec_len < bestfit.rec_len) {
     485    59110780 :                                 bestfit.rec_len = rec->rec_len;
     486    59110780 :                                 bestfit.rec_ptr = rec_ptr;
     487    59110780 :                                 bestfit.last_ptr = last_ptr;
     488             :                         }
     489             :                 }
     490             : 
     491             :                 /* move to the next record */
     492   361779516 :                 last_ptr = rec_ptr;
     493   361779516 :                 rec_ptr = rec->next;
     494             : 
     495   361779516 :                 if (!modified) {
     496             :                         bool ok;
     497   330478945 :                         ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
     498   330478945 :                         if (!ok) {
     499           1 :                                 return 0;
     500             :                         }
     501             :                 }
     502             : 
     503             :                 /* if we've found a record that is big enough, then
     504             :                    stop searching if its also not too big. The
     505             :                    definition of 'too big' changes as we scan
     506             :                    through */
     507   475827430 :                 if (bestfit.rec_len > 0 &&
     508   128504560 :                     bestfit.rec_len < length * multiplier) {
     509     1475657 :                         break;
     510             :                 }
     511             : 
     512             :                 /* this multiplier means we only extremely rarely
     513             :                    search more than 50 or so records. At 50 records we
     514             :                    accept records up to 11 times larger than what we
     515             :                    want */
     516   360167881 :                 multiplier *= 1.05;
     517             :         }
     518             : 
     519    59329438 :         if (bestfit.rec_ptr != 0) {
     520    57421654 :                 if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
     521           0 :                         return 0;
     522             :                 }
     523             : 
     524    57421654 :                 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr,
     525             :                                               rec, bestfit.last_ptr);
     526    57421654 :                 return newrec_ptr;
     527             :         }
     528             : 
     529     1907784 :         if (merge_created_candidate) {
     530        2047 :                 goto again;
     531             :         }
     532             : 
     533             :         /* we didn't find enough space. See if we can expand the
     534             :            database and if we can then try again */
     535     1905736 :         if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
     536     1796044 :                 goto again;
     537             : 
     538           0 :         return 0;
     539             : }
     540             : 
     541      679975 : static bool tdb_alloc_dead(
     542             :         struct tdb_context *tdb, int hash, tdb_len_t length,
     543             :         tdb_off_t *rec_ptr, struct tdb_record *rec)
     544             : {
     545             :         tdb_off_t last_ptr;
     546             : 
     547      679975 :         *rec_ptr = tdb_find_dead(tdb, hash, rec, length, &last_ptr);
     548      679975 :         if (*rec_ptr == 0) {
     549      366595 :                 return false;
     550             :         }
     551             :         /*
     552             :          * Unlink the record from the hash chain, it's about to be moved into
     553             :          * another one.
     554             :          */
     555      313310 :         return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0);
     556             : }
     557             : 
     558    54526399 : static void tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
     559             : {
     560    57421655 :         int max_dead_records = tdb->max_dead_records;
     561             : 
     562    57421655 :         tdb->max_dead_records = 0;
     563             : 
     564    57421655 :         tdb_trim_dead(tdb, hash);
     565             : 
     566    57421655 :         tdb->max_dead_records = max_dead_records;
     567    54526399 : }
     568             : 
     569             : /*
     570             :  * Chain "hash" is assumed to be locked
     571             :  */
     572             : 
     573    57734965 : tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length,
     574             :                        struct tdb_record *rec)
     575             : {
     576             :         tdb_off_t ret;
     577             :         uint32_t i;
     578             : 
     579    57734965 :         if (tdb->max_dead_records == 0) {
     580             :                 /*
     581             :                  * No dead records to expect anywhere. Do the blocking
     582             :                  * freelist lock without trying to steal from others
     583             :                  */
     584    54159804 :                 goto blocking_freelist_allocate;
     585             :         }
     586             : 
     587             :         /*
     588             :          * The following loop tries to get the freelist lock nonblocking. If
     589             :          * it gets the lock, allocate from there. If the freelist is busy,
     590             :          * instead of waiting we try to steal dead records from other hash
     591             :          * chains.
     592             :          *
     593             :          * Be aware that we do nonblocking locks on the other hash chains as
     594             :          * well and fail gracefully. This way we avoid deadlocks (we block two
     595             :          * hash chains, something which is pretty bad normally)
     596             :          */
     597             : 
     598      679200 :         for (i=0; i<tdb->hash_size; i++) {
     599             : 
     600             :                 int list;
     601             : 
     602      679975 :                 list = BUCKET(hash+i);
     603             : 
     604      679975 :                 if (tdb_lock_nonblock(tdb, list, F_WRLCK) == 0) {
     605             :                         bool got_dead;
     606             : 
     607      679975 :                         got_dead = tdb_alloc_dead(tdb, list, length, &ret, rec);
     608      679975 :                         tdb_unlock(tdb, list, F_WRLCK);
     609             : 
     610      679975 :                         if (got_dead) {
     611      313310 :                                 return ret;
     612             :                         }
     613             :                 }
     614             : 
     615      366665 :                 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == 0) {
     616             :                         /*
     617             :                          * Under the freelist lock take the chance to give
     618             :                          * back our dead records.
     619             :                          */
     620      366735 :                         tdb_purge_dead(tdb, hash);
     621             : 
     622      366665 :                         ret = tdb_allocate_from_freelist(tdb, length, rec);
     623      366665 :                         tdb_unlock(tdb, -1, F_WRLCK);
     624      366665 :                         return ret;
     625             :                 }
     626             :         }
     627             : 
     628     2895186 : blocking_freelist_allocate:
     629             : 
     630    57054990 :         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
     631           0 :                 return 0;
     632             :         }
     633             :         /*
     634             :          * Dead records can happen even if max_dead_records==0, they
     635             :          * are older than the max_dead_records concept: They happen if
     636             :          * tdb_delete happens concurrently with a traverse.
     637             :          */
     638    59950176 :         tdb_purge_dead(tdb, hash);
     639    57054990 :         ret = tdb_allocate_from_freelist(tdb, length, rec);
     640    57054990 :         tdb_unlock(tdb, -1, F_WRLCK);
     641    57054990 :         return ret;
     642             : }
     643             : 
     644             : /**
     645             :  * Merge adjacent records in the freelist.
     646             :  */
     647           4 : static int tdb_freelist_merge_adjacent(struct tdb_context *tdb,
     648             :                                        int *count_records, int *count_merged)
     649             : {
     650             :         tdb_off_t cur, next;
     651           4 :         int count = 0;
     652           4 :         int merged = 0;
     653             :         int ret;
     654             : 
     655           4 :         ret = tdb_lock(tdb, -1, F_RDLCK);
     656           4 :         if (ret == -1) {
     657           0 :                 return -1;
     658             :         }
     659             : 
     660           2 :         cur = FREELIST_TOP;
     661           6 :         while (tdb_ofs_read(tdb, cur, &next) == 0 && next != 0) {
     662             :                 tdb_off_t next2;
     663             : 
     664           0 :                 count++;
     665             : 
     666           0 :                 ret = check_merge_ptr_with_left_record(tdb, next, &next2);
     667           0 :                 if (ret == -1) {
     668           0 :                         goto done;
     669             :                 }
     670           0 :                 if (ret == 1) {
     671             :                         /*
     672             :                          * merged:
     673             :                          * now let cur->next point to next2 instead of next
     674             :                          */
     675             : 
     676           0 :                         ret = tdb_ofs_write(tdb, cur, &next2);
     677           0 :                         if (ret != 0) {
     678           0 :                                 goto done;
     679             :                         }
     680             : 
     681           0 :                         next = next2;
     682           0 :                         merged++;
     683             :                 }
     684             : 
     685           0 :                 cur = next;
     686             :         }
     687             : 
     688           4 :         if (count_records != NULL) {
     689           4 :                 *count_records = count;
     690             :         }
     691             : 
     692           4 :         if (count_merged != NULL) {
     693           0 :                 *count_merged = merged;
     694             :         }
     695             : 
     696           2 :         ret = 0;
     697             : 
     698           4 : done:
     699           4 :         tdb_unlock(tdb, -1, F_RDLCK);
     700           4 :         return ret;
     701             : }
     702             : 
     703             : /**
     704             :  * return the size of the freelist - no merging done
     705             :  */
     706           0 : static int tdb_freelist_size_no_merge(struct tdb_context *tdb)
     707             : {
     708             :         tdb_off_t ptr;
     709           0 :         int count=0;
     710             : 
     711           0 :         if (tdb_lock(tdb, -1, F_RDLCK) == -1) {
     712           0 :                 return -1;
     713             :         }
     714             : 
     715           0 :         ptr = FREELIST_TOP;
     716           0 :         while (tdb_ofs_read(tdb, ptr, &ptr) == 0 && ptr != 0) {
     717           0 :                 count++;
     718             :         }
     719             : 
     720           0 :         tdb_unlock(tdb, -1, F_RDLCK);
     721           0 :         return count;
     722             : }
     723             : 
     724             : /**
     725             :  * return the size of the freelist - used to decide if we should repack
     726             :  *
     727             :  * As a side effect, adjacent records are merged unless the
     728             :  * database is read-only, in order to reduce the fragmentation
     729             :  * without repacking.
     730             :  */
     731           4 : _PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
     732             : {
     733             : 
     734           4 :         int count = 0;
     735             : 
     736           4 :         if (tdb->read_only) {
     737           0 :                 count = tdb_freelist_size_no_merge(tdb);
     738             :         } else {
     739             :                 int ret;
     740           4 :                 ret = tdb_freelist_merge_adjacent(tdb, &count, NULL);
     741           4 :                 if (ret != 0) {
     742           0 :                         return -1;
     743             :                 }
     744             :         }
     745             : 
     746           4 :         return count;
     747             : }

Generated by: LCOV version 1.13