diff options
Diffstat (limited to 'tqtinterface/qt4/src/3rdparty/sqlite/btree.c')
| -rw-r--r-- | tqtinterface/qt4/src/3rdparty/sqlite/btree.c | 3579 | 
1 files changed, 3579 insertions, 0 deletions
diff --git a/tqtinterface/qt4/src/3rdparty/sqlite/btree.c b/tqtinterface/qt4/src/3rdparty/sqlite/btree.c new file mode 100644 index 0000000..f3b92c6 --- /dev/null +++ b/tqtinterface/qt4/src/3rdparty/sqlite/btree.c @@ -0,0 +1,3579 @@ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you tqfind forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** $Id: btree.c,v 1.102 2004/02/14 17:35:07 drh Exp $ +** +** This file implements a external (disk-based) database using BTrees. +** For a detailed discussion of BTrees, refer to +** +**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: +**     "Sorting And Searching", pages 473-480. Addison-Wesley +**     Publishing Company, Reading, Massachusetts. +** +** The basic idea is that each page of the file tqcontains N database +** entries and N+1 pointers to subpages. +** +**   ---------------------------------------------------------------- +**   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) | +**   ---------------------------------------------------------------- +** +** All of the keys on the page that Ptr(0) points to have values less +** than Key(0).  All of the keys on page Ptr(1) and its subpages have +** values greater than Key(0) and less than Key(1).  All of the keys +** on Ptr(N+1) and its subpages have values greater than Key(N).  And +** so forth. +** +** Finding a particular key requires reading O(log(M)) pages from the  +** disk where M is the number of entries in the tree. +** +** In this implementation, a single file can hold one or more separate  +** BTrees.  Each BTree is identified by the index of its root page.  The +** key and data for any entry are combined to form the "payload".  Up to +** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the +** database page.  If the payload is larger than MX_LOCAL_PAYLOAD bytes +** then surplus bytes are stored on overflow pages.  The payload for an +** entry and the preceding pointer are combined to form a "Cell".  Each  +** page has a small header which tqcontains the Ptr(N+1) pointer. +** +** The first page of the file tqcontains a magic string used to verify that +** the file really is a valid BTree database, a pointer to a list of unused +** pages in the file, and some meta information.  The root of the first +** BTree begins on page 2 of the file.  (Pages are numbered beginning with +** 1, not 0.)  Thus a minimum database tqcontains 2 pages. +*/ +#include "sqliteInt.h" +#include "pager.h" +#include "btree.h" +#include <assert.h> + +/* Forward declarations */ +static BtOps sqliteBtreeOps; +static BtCursorOps sqliteBtreeCursorOps; + +/* +** Macros used for byteswapping.  B is a pointer to the Btree +** structure.  This is needed to access the Btree.needSwab boolean +** in order to tell if byte swapping is needed or not. +** X is an unsigned integer.  SWAB16 byte swaps a 16-bit integer. +** SWAB32 byteswaps a 32-bit integer. +*/ +#define SWAB16(B,X)   ((B)->needSwab? swab16((u16)X) : ((u16)X)) +#define SWAB32(B,X)   ((B)->needSwab? swab32(X) : (X)) +#define SWAB_ADD(B,X,A) \ +   if((B)->needSwab){ X=swab32(swab32(X)+A); }else{ X += (A); } + +/* +** The following global variable - available only if STQLITE_TEST is +** defined - is used to determine whether new databases are created in +** native byte order or in non-native byte order.  Non-native byte order +** databases are created for testing purposes only.  Under normal operation, +** only native byte-order databases should be created, but we should be +** able to read or write existing databases regardless of the byteorder. +*/ +#ifdef STQLITE_TEST +int btree_native_byte_order = 1; +#else +# define btree_native_byte_order 1 +#endif + +/* +** Forward declarations of structures used only in this file. +*/ +typedef struct PageOne PageOne; +typedef struct MemPage MemPage; +typedef struct PageHdr PageHdr; +typedef struct Cell Cell; +typedef struct CellHdr CellHdr; +typedef struct FreeBlk FreeBlk; +typedef struct OverflowPage OverflowPage; +typedef struct FreelistInfo FreelistInfo; + +/* +** All structures on a database page are aligned to 4-byte boundries. +** This routine rounds up a number of bytes to the next multiple of 4. +** +** This might need to change for computer architectures that require +** and 8-byte tqalignment boundry for structures. +*/ +#define ROUNDUP(X)  ((X+3) & ~3) + +/* +** This is a magic string that appears at the beginning of every +** STQLite database in order to identify the file as a real database. +*/ +static const char zMagicHeader[] =  +   "** This file tqcontains an STQLite 2.1 database **"; +#define MAGIC_SIZE (sizeof(zMagicHeader)) + +/* +** This is a magic integer also used to test the integrity of the database +** file.  This integer is used in addition to the string above so that +** if the file is written on a little-endian architecture and read +** on a big-endian architectures (or vice versa) we can detect the +** problem. +** +** The number used was obtained at random and has no special +** significance other than the fact that it represents a different +** integer on little-endian and big-endian machines. +*/ +#define MAGIC 0xdae37528 + +/* +** The first page of the database file tqcontains a magic header string +** to identify the file as an STQLite database file.  It also tqcontains +** a pointer to the first free page of the file.  Page 2 tqcontains the +** root of the principle BTree.  The file might contain other BTrees +** rooted on pages above 2. +** +** The first page also tqcontains STQLITE_N_BTREE_META integers that +** can be used by higher-level routines. +** +** Remember that pages are numbered beginning with 1.  (See pager.c +** for additional information.)  Page 0 does not exist and a page +** number of 0 is used to mean "no such page". +*/ +struct PageOne { +  char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */ +  int iMagic;              /* Integer to verify correct byte order */ +  Pgno freeList;           /* First free page in a list of all free pages */ +  int nFree;               /* Number of pages on the free list */ +  int aMeta[STQLITE_N_BTREE_META-1];  /* User defined integers */ +}; + +/* +** Each database page has a header that is an instance of this +** structure. +** +** PageHdr.firstFree is 0 if there is no free space on this page. +** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a  +** FreeBlk structure that describes the first block of free space.   +** All free space is defined by a linked list of FreeBlk structures. +** +** Data is stored in a linked list of Cell structures.  PageHdr.firstCell +** is the index into MemPage.u.aDisk[] of the first cell on the page.  The +** Cells are kept in sorted order. +** +** A Cell tqcontains all information about a database entry and a pointer +** to a child page that tqcontains other entries less than itself.  In +** other words, the i-th Cell tqcontains both Ptr(i) and Key(i).  The +** right-most pointer of the page is contained in PageHdr.rightChild. +*/ +struct PageHdr { +  Pgno rightChild;  /* Child page that comes after all cells on this page */ +  u16 firstCell;    /* Index in MemPage.u.aDisk[] of the first cell */ +  u16 firstFree;    /* Index in MemPage.u.aDisk[] of the first free block */ +}; + +/* +** Entries on a page of the database are called "Cells".  Each Cell +** has a header and data.  This structure defines the header.  The +** key and data (collectively the "payload") follow this header on +** the database page. +** +** A definition of the complete Cell structure is given below.  The +** header for the cell must be defined first in order to do some +** of the sizing #defines that follow. +*/ +struct CellHdr { +  Pgno leftChild; /* Child page that comes before this cell */ +  u16 nKey;       /* Number of bytes in the key */ +  u16 iNext;      /* Index in MemPage.u.aDisk[] of next cell in sorted order */ +  u8 nKeyHi;      /* Upper 8 bits of key size for keys larger than 64K bytes */ +  u8 nDataHi;     /* Upper 8 bits of data size when the size is more than 64K */ +  u16 nData;      /* Number of bytes of data */ +}; + +/* +** The key and data size are split into a lower 16-bit segment and an +** upper 8-bit segment in order to pack them together into a smaller +** space.  The following macros reassembly a key or data size back +** into an integer. +*/ +#define NKEY(b,h)  (SWAB16(b,h.nKey) + h.nKeyHi*65536) +#define NDATA(b,h) (SWAB16(b,h.nData) + h.nDataHi*65536) + +/* +** The minimum size of a complete Cell.  The Cell must contain a header +** and at least 4 bytes of payload. +*/ +#define MIN_CELL_SIZE  (sizeof(CellHdr)+4) + +/* +** The maximum number of database entries that can be held in a single +** page of the database.  +*/ +#define MX_CELL ((STQLITE_USABLE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE) + +/* +** The amount of usable space on a single page of the BTree.  This is the +** page size minus the overhead of the page header. +*/ +#define USABLE_SPACE  (STQLITE_USABLE_SIZE - sizeof(PageHdr)) + +/* +** The maximum amount of payload (in bytes) that can be stored locally for +** a database entry.  If the entry tqcontains more data than this, the +** extra goes onto overflow pages. +** +** This number is chosen so that at least 4 cells will fit on every page. +*/ +#define MX_LOCAL_PAYLOAD ((USABLE_SPACE/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3) + +/* +** Data on a database page is stored as a linked list of Cell structures. +** Both the key and the data are stored in aPayload[].  The key always comes +** first.  The aPayload[] field grows as necessary to hold the key and data, +** up to a maximum of MX_LOCAL_PAYLOAD bytes.  If the size of the key and +** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the +** page number of the first overflow page. +** +** Though this structure is fixed in size, the Cell on the database +** page varies in size.  Every cell has a CellHdr and at least 4 bytes +** of payload space.  Additional payload bytes (up to the maximum of +** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as +** needed. +*/ +struct Cell { +  CellHdr h;                        /* The cell header */ +  char aPayload[MX_LOCAL_PAYLOAD];  /* Key and data */ +  Pgno ovfl;                        /* The first overflow page */ +}; + +/* +** Free space on a page is remembered using a linked list of the FreeBlk +** structures.  Space on a database page is allocated in increments of +** at least 4 bytes and is always aligned to a 4-byte boundry.  The +** linked list of FreeBlks is always kept in order by address. +*/ +struct FreeBlk { +  u16 iSize;      /* Number of bytes in this block of free space */ +  u16 iNext;      /* Index in MemPage.u.aDisk[] of the next free block */ +}; + +/* +** The number of bytes of payload that will fit on a single overflow page. +*/ +#define OVERFLOW_SIZE (STQLITE_USABLE_SIZE-sizeof(Pgno)) + +/* +** When the key and data for a single entry in the BTree will not fit in +** the MX_LOCAL_PAYLOAD bytes of space available on the database page, +** then all extra bytes are written to a linked list of overflow pages. +** Each overflow page is an instance of the following structure. +** +** Unused pages in the database are also represented by instances of +** the OverflowPage structure.  The PageOne.freeList field is the +** page number of the first page in a linked list of unused database +** pages. +*/ +struct OverflowPage { +  Pgno iNext; +  char aPayload[OVERFLOW_SIZE]; +}; + +/* +** The PageOne.freeList field points to a linked list of overflow pages +** hold information about free pages.  The aPayload section of each +** overflow page tqcontains an instance of the following structure.  The +** aFree[] array holds the page number of nFree unused pages in the disk +** file. +*/ +struct FreelistInfo { +  int nFree; +  Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)]; +}; + +/* +** For every page in the database file, an instance of the following structure +** is stored in memory.  The u.aDisk[] array tqcontains the raw bits read from +** the disk.  The rest is auxiliary information held in memory only. The +** auxiliary info is only valid for regular database pages - it is not +** used for overflow pages and pages on the freelist. +** +** Of particular interest in the auxiliary info is the apCell[] entry.  Each +** apCell[] entry is a pointer to a Cell structure in u.aDisk[].  The cells are +** put in this array so that they can be accessed in constant time, rather +** than in linear time which would be needed if we had to walk the linked  +** list on every access. +** +** Note that apCell[] tqcontains enough space to hold up to two more Cells +** than can possibly fit on one page.  In the steady state, every apCell[] +** points to memory inside u.aDisk[].  But in the middle of an insert +** operation, some apCell[] entries may temporarily point to data space +** outside of u.aDisk[].  This is a transient situation that is quickly +** resolved.  But while it is happening, it is possible for a database +** page to hold as many as two more cells than it might otherwise hold. +** The extra two entries in apCell[] are an allowance for this situation. +** +** The pParent field points back to the tqparent page.  This allows us to +** walk up the BTree from any leaf to the root.  Care must be taken to +** unref() the tqparent page pointer when this page is no longer referenced. +** The pageDestructor() routine handles that chore. +*/ +struct MemPage { +  union u_page_data { +    char aDisk[STQLITE_PAGE_SIZE];  /* Page data stored on disk */ +    PageHdr hdr;                   /* Overlay page header */ +  } u; +  u8 isInit;                     /* True if auxiliary data is initialized */ +  u8 idxShift;                   /* True if apCell[] indices have changed */ +  u8 isOverfull;                 /* Some apCell[] points outside u.aDisk[] */ +  MemPage *pParent;              /* The tqparent of this page.  NULL for root */ +  int idxParent;                 /* Index in pParent->apCell[] of this node */ +  int nFree;                     /* Number of free bytes in u.aDisk[] */ +  int nCell;                     /* Number of entries on this page */ +  Cell *apCell[MX_CELL+2];       /* All data entires in sorted order */ +}; + +/* +** The in-memory image of a disk page has the auxiliary information appended +** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold +** that extra information. +*/ +#define EXTRA_SIZE (sizeof(MemPage)-sizeof(union u_page_data)) + +/* +** Everything we need to know about an open database +*/ +struct Btree { +  BtOps *pOps;          /* Function table */ +  Pager *pPager;        /* The page cache */ +  BtCursor *pCursor;    /* A list of all open cursors */ +  PageOne *page1;       /* First page of the database */ +  u8 inTrans;           /* True if a transaction is in progress */ +  u8 inCkpt;            /* True if there is a checkpoint on the transaction */ +  u8 readOnly;          /* True if the underlying file is readonly */ +  u8 needSwab;          /* Need to byte-swapping */ +}; +typedef Btree Bt; + +/* +** A cursor is a pointer to a particular entry in the BTree. +** The entry is identified by its MemPage and the index in +** MemPage.apCell[] of the entry. +*/ +struct BtCursor { +  BtCursorOps *pOps;        /* Function table */ +  Btree *pBt;               /* The Btree to which this cursor belongs */ +  BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */ +  BtCursor *pShared;        /* Loop of cursors with the same root page */ +  Pgno pgnoRoot;            /* The root page of this tree */ +  MemPage *pPage;           /* Page that tqcontains the entry */ +  int idx;                  /* Index of the entry in pPage->apCell[] */ +  u8 wrFlag;                /* True if writable */ +  u8 eSkip;                 /* Determines if next step operation is a no-op */ +  u8 iMatch;                /* compare result from last sqliteBtreeMoveto() */ +}; + +/* +** Legal values for BtCursor.eSkip. +*/ +#define SKIP_NONE     0   /* Always step the cursor */ +#define SKIP_NEXT     1   /* The next sqliteBtreeNext() is a no-op */ +#define SKIP_PREV     2   /* The next sqliteBtreePrevious() is a no-op */ +#define SKIP_INVALID  3   /* Calls to Next() and Previous() are invalid */ + +/* Forward declarations */ +static int fileBtreeCloseCursor(BtCursor *pCur); + +/* +** Routines for byte swapping. +*/ +u16 swab16(u16 x){ +  return ((x & 0xff)<<8) | ((x>>8)&0xff); +} +u32 swab32(u32 x){ +  return ((x & 0xff)<<24) | ((x & 0xff00)<<8) | +         ((x>>8) & 0xff00) | ((x>>24)&0xff); +} + +/* +** Compute the total number of bytes that a Cell needs on the main +** database page.  The number returned includes the Cell header, +** local payload storage, and the pointer to overflow pages (if +** applicable).  Additional space allocated on overflow pages +** is NOT included in the value returned from this routine. +*/ +static int cellSize(Btree *pBt, Cell *pCell){ +  int n = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h); +  if( n>MX_LOCAL_PAYLOAD ){ +    n = MX_LOCAL_PAYLOAD + sizeof(Pgno); +  }else{ +    n = ROUNDUP(n); +  } +  n += sizeof(CellHdr); +  return n; +} + +/* +** Defragment the page given.  All Cells are moved to the +** beginning of the page and all free space is collected  +** into one big FreeBlk at the end of the page. +*/ +static void defragmentPage(Btree *pBt, MemPage *pPage){ +  int pc, i, n; +  FreeBlk *pFBlk; +  char newPage[STQLITE_USABLE_SIZE]; + +  assert( sqlitepager_iswriteable(pPage) ); +  assert( pPage->isInit ); +  pc = sizeof(PageHdr); +  pPage->u.hdr.firstCell = SWAB16(pBt, pc); +  memcpy(newPage, pPage->u.aDisk, pc); +  for(i=0; i<pPage->nCell; i++){ +    Cell *pCell = pPage->apCell[i]; + +    /* This routine should never be called on an overfull page.  The +    ** following asserts verify that constraint. */ +    assert( Addr(pCell) > Addr(pPage) ); +    assert( Addr(pCell) < Addr(pPage) + STQLITE_USABLE_SIZE ); + +    n = cellSize(pBt, pCell); +    pCell->h.iNext = SWAB16(pBt, pc + n); +    memcpy(&newPage[pc], pCell, n); +    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc]; +    pc += n; +  } +  assert( pPage->nFree==STQLITE_USABLE_SIZE-pc ); +  memcpy(pPage->u.aDisk, newPage, pc); +  if( pPage->nCell>0 ){ +    pPage->apCell[pPage->nCell-1]->h.iNext = 0; +  } +  pFBlk = (FreeBlk*)&pPage->u.aDisk[pc]; +  pFBlk->iSize = SWAB16(pBt, STQLITE_USABLE_SIZE - pc); +  pFBlk->iNext = 0; +  pPage->u.hdr.firstFree = SWAB16(pBt, pc); +  memset(&pFBlk[1], 0, STQLITE_USABLE_SIZE - pc - sizeof(FreeBlk)); +} + +/* +** Allocate nByte bytes of space on a page.  nByte must be a  +** multiple of 4. +** +** Return the index into pPage->u.aDisk[] of the first byte of +** the new allocation. Or return 0 if there is not enough free +** space on the page to satisfy the allocation request. +** +** If the page tqcontains nBytes of free space but does not contain +** nBytes of contiguous free space, then this routine automatically +** calls defragementPage() to consolidate all free space before  +** allocating the new chunk. +*/ +static int allocateSpace(Btree *pBt, MemPage *pPage, int nByte){ +  FreeBlk *p; +  u16 *pIdx; +  int start; +  int iSize; +#ifndef NDEBUG +  int cnt = 0; +#endif + +  assert( sqlitepager_iswriteable(pPage) ); +  assert( nByte==ROUNDUP(nByte) ); +  assert( pPage->isInit ); +  if( pPage->nFree<nByte || pPage->isOverfull ) return 0; +  pIdx = &pPage->u.hdr.firstFree; +  p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)]; +  while( (iSize = SWAB16(pBt, p->iSize))<nByte ){ +    assert( cnt++ < STQLITE_USABLE_SIZE/4 ); +    if( p->iNext==0 ){ +      defragmentPage(pBt, pPage); +      pIdx = &pPage->u.hdr.firstFree; +    }else{ +      pIdx = &p->iNext; +    } +    p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)]; +  } +  if( iSize==nByte ){ +    start = SWAB16(pBt, *pIdx); +    *pIdx = p->iNext; +  }else{ +    FreeBlk *pNew; +    start = SWAB16(pBt, *pIdx); +    pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte]; +    pNew->iNext = p->iNext; +    pNew->iSize = SWAB16(pBt, iSize - nByte); +    *pIdx = SWAB16(pBt, start + nByte); +  } +  pPage->nFree -= nByte; +  return start; +} + +/* +** Return a section of the MemPage.u.aDisk[] to the freelist. +** The first byte of the new free block is pPage->u.aDisk[start] +** and the size of the block is "size" bytes.  Size must be +** a multiple of 4. +** +** Most of the effort here is involved in coalesing adjacent +** free blocks into a single big free block. +*/ +static void freeSpace(Btree *pBt, MemPage *pPage, int start, int size){ +  int end = start + size; +  u16 *pIdx, idx; +  FreeBlk *pFBlk; +  FreeBlk *pNew; +  FreeBlk *pNext; +  int iSize; + +  assert( sqlitepager_iswriteable(pPage) ); +  assert( size == ROUNDUP(size) ); +  assert( start == ROUNDUP(start) ); +  assert( pPage->isInit ); +  pIdx = &pPage->u.hdr.firstFree; +  idx = SWAB16(pBt, *pIdx); +  while( idx!=0 && idx<start ){ +    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx]; +    iSize = SWAB16(pBt, pFBlk->iSize); +    if( idx + iSize == start ){ +      pFBlk->iSize = SWAB16(pBt, iSize + size); +      if( idx + iSize + size == SWAB16(pBt, pFBlk->iNext) ){ +        pNext = (FreeBlk*)&pPage->u.aDisk[idx + iSize + size]; +        if( pBt->needSwab ){ +          pFBlk->iSize = swab16((u16)swab16(pNext->iSize)+iSize+size); +        }else{ +          pFBlk->iSize += pNext->iSize; +        } +        pFBlk->iNext = pNext->iNext; +      } +      pPage->nFree += size; +      return; +    } +    pIdx = &pFBlk->iNext; +    idx = SWAB16(pBt, *pIdx); +  } +  pNew = (FreeBlk*)&pPage->u.aDisk[start]; +  if( idx != end ){ +    pNew->iSize = SWAB16(pBt, size); +    pNew->iNext = SWAB16(pBt, idx); +  }else{ +    pNext = (FreeBlk*)&pPage->u.aDisk[idx]; +    pNew->iSize = SWAB16(pBt, size + SWAB16(pBt, pNext->iSize)); +    pNew->iNext = pNext->iNext; +  } +  *pIdx = SWAB16(pBt, start); +  pPage->nFree += size; +} + +/* +** Initialize the auxiliary information for a disk block. +** +** The pParent parameter must be a pointer to the MemPage which +** is the tqparent of the page being initialized.  The root of the +** BTree (usually page 2) has no tqparent and so for that page,  +** pParent==NULL. +** +** Return STQLITE_OK on success.  If we see that the page does +** not contain a well-formed database page, then return  +** STQLITE_CORRUPT.  Note that a return of STQLITE_OK does not +** guarantee that the page is well-formed.  It only shows that +** we failed to detect any corruption. +*/ +static int initPage(Bt *pBt, MemPage *pPage, Pgno pgnoThis, MemPage *pParent){ +  int idx;           /* An index into pPage->u.aDisk[] */ +  Cell *pCell;       /* A pointer to a Cell in pPage->u.aDisk[] */ +  FreeBlk *pFBlk;    /* A pointer to a free block in pPage->u.aDisk[] */ +  int sz;            /* The size of a Cell in bytes */ +  int freeSpace;     /* Amount of free space on the page */ + +  if( pPage->pParent ){ +    assert( pPage->pParent==pParent ); +    return STQLITE_OK; +  } +  if( pParent ){ +    pPage->pParent = pParent; +    sqlitepager_ref(pParent); +  } +  if( pPage->isInit ) return STQLITE_OK; +  pPage->isInit = 1; +  pPage->nCell = 0; +  freeSpace = USABLE_SPACE; +  idx = SWAB16(pBt, pPage->u.hdr.firstCell); +  while( idx!=0 ){ +    if( idx>STQLITE_USABLE_SIZE-MIN_CELL_SIZE ) goto page_format_error; +    if( idx<sizeof(PageHdr) ) goto page_format_error; +    if( idx!=ROUNDUP(idx) ) goto page_format_error; +    pCell = (Cell*)&pPage->u.aDisk[idx]; +    sz = cellSize(pBt, pCell); +    if( idx+sz > STQLITE_USABLE_SIZE ) goto page_format_error; +    freeSpace -= sz; +    pPage->apCell[pPage->nCell++] = pCell; +    idx = SWAB16(pBt, pCell->h.iNext); +  } +  pPage->nFree = 0; +  idx = SWAB16(pBt, pPage->u.hdr.firstFree); +  while( idx!=0 ){ +    int iNext; +    if( idx>STQLITE_USABLE_SIZE-sizeof(FreeBlk) ) goto page_format_error; +    if( idx<sizeof(PageHdr) ) goto page_format_error; +    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx]; +    pPage->nFree += SWAB16(pBt, pFBlk->iSize); +    iNext = SWAB16(pBt, pFBlk->iNext); +    if( iNext>0 && iNext <= idx ) goto page_format_error; +    idx = iNext; +  } +  if( pPage->nCell==0 && pPage->nFree==0 ){ +    /* As a special case, an uninitialized root page appears to be +    ** an empty database */ +    return STQLITE_OK; +  } +  if( pPage->nFree!=freeSpace ) goto page_format_error; +  return STQLITE_OK; + +page_format_error: +  return STQLITE_CORRUPT; +} + +/* +** Set up a raw page so that it looks like a database page holding +** no entries. +*/ +static void zeroPage(Btree *pBt, MemPage *pPage){ +  PageHdr *pHdr; +  FreeBlk *pFBlk; +  assert( sqlitepager_iswriteable(pPage) ); +  memset(pPage, 0, STQLITE_USABLE_SIZE); +  pHdr = &pPage->u.hdr; +  pHdr->firstCell = 0; +  pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr)); +  pFBlk = (FreeBlk*)&pHdr[1]; +  pFBlk->iNext = 0; +  pPage->nFree = STQLITE_USABLE_SIZE - sizeof(*pHdr); +  pFBlk->iSize = SWAB16(pBt, pPage->nFree); +  pPage->nCell = 0; +  pPage->isOverfull = 0; +} + +/* +** This routine is called when the reference count for a page +** reaches zero.  We need to unref the pParent pointer when that +** happens. +*/ +static void pageDestructor(void *pData){ +  MemPage *pPage = (MemPage*)pData; +  if( pPage->pParent ){ +    MemPage *pParent = pPage->pParent; +    pPage->pParent = 0; +    sqlitepager_unref(pParent); +  } +} + +/* +** Open a new database. +** +** Actually, this routine just sets up the internal data structures +** for accessing the database.  We do not open the database file  +** until the first page is loaded. +** +** zFilename is the name of the database file.  If zFilename is NULL +** a new database with a random name is created.  This randomly named +** database file will be deleted when sqliteBtreeClose() is called. +*/ +int sqliteBtreeOpen( +  const char *zFilename,    /* Name of the file containing the BTree database */ +  int omitJournal,          /* if TRUE then do not journal this file */ +  int nCache,               /* How many pages in the page cache */ +  Btree **ppBtree           /* Pointer to new Btree object written here */ +){ +  Btree *pBt; +  int rc; + +  /* +  ** The following asserts make sure that structures used by the btree are +  ** the right size.  This is to guard against size changes that result +  ** when compiling on a different architecture. +  */ +  assert( sizeof(u32)==4 ); +  assert( sizeof(u16)==2 ); +  assert( sizeof(Pgno)==4 ); +  assert( sizeof(PageHdr)==8 ); +  assert( sizeof(CellHdr)==12 ); +  assert( sizeof(FreeBlk)==4 ); +  assert( sizeof(OverflowPage)==STQLITE_USABLE_SIZE ); +  assert( sizeof(FreelistInfo)==OVERFLOW_SIZE ); +  assert( sizeof(ptr)==sizeof(char*) ); +  assert( sizeof(uptr)==sizeof(ptr) ); + +  pBt = sqliteMalloc( sizeof(*pBt) ); +  if( pBt==0 ){ +    *ppBtree = 0; +    return STQLITE_NOMEM; +  } +  if( nCache<10 ) nCache = 10; +  rc = sqlitepager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE, +                        !omitJournal); +  if( rc!=STQLITE_OK ){ +    if( pBt->pPager ) sqlitepager_close(pBt->pPager); +    sqliteFree(pBt); +    *ppBtree = 0; +    return rc; +  } +  sqlitepager_set_destructor(pBt->pPager, pageDestructor); +  pBt->pCursor = 0; +  pBt->page1 = 0; +  pBt->readOnly = sqlitepager_isreadonly(pBt->pPager); +  pBt->pOps = &sqliteBtreeOps; +  *ppBtree = pBt; +  return STQLITE_OK; +} + +/* +** Close an open database and tqinvalidate all cursors. +*/ +static int fileBtreeClose(Btree *pBt){ +  while( pBt->pCursor ){ +    fileBtreeCloseCursor(pBt->pCursor); +  } +  sqlitepager_close(pBt->pPager); +  sqliteFree(pBt); +  return STQLITE_OK; +} + +/* +** Change the limit on the number of pages allowed in the cache. +** +** The maximum number of cache pages is set to the absolute +** value of mxPage.  If mxPage is negative, the pager will +** operate asynchronously - it will not stop to do fsync()s +** to insure data is written to the disk surface before +** continuing.  Transactions still work if synchronous is off, +** and the database cannot be corrupted if this program +** crashes.  But if the operating system crashes or there is +** an abrupt power failure when synchronous is off, the database +** could be left in an inconsistent and unrecoverable state. +** Synchronous is on by default so database corruption is not +** normally a worry. +*/ +static int fileBtreeSetCacheSize(Btree *pBt, int mxPage){ +  sqlitepager_set_cachesize(pBt->pPager, mxPage); +  return STQLITE_OK; +} + +/* +** Change the way data is synced to disk in order to increase or decrease +** how well the database resists damage due to OS crashes and power +** failures.  Level 1 is the same as asynchronous (no syncs() occur and +** there is a high probability of damage)  Level 2 is the default.  There +** is a very low but non-zero probability of damage.  Level 3 reduces the +** probability of damage to near zero but with a write performance reduction. +*/ +static int fileBtreeSetSafetyLevel(Btree *pBt, int level){ +  sqlitepager_set_safety_level(pBt->pPager, level); +  return STQLITE_OK; +} + +/* +** Get a reference to page1 of the database file.  This will +** also acquire a readlock on that file. +** +** STQLITE_OK is returned on success.  If the file is not a +** well-formed database file, then STQLITE_CORRUPT is returned. +** STQLITE_BUSY is returned if the database is locked.  STQLITE_NOMEM +** is returned if we run out of memory.  STQLITE_PROTOCOL is returned +** if there is a locking protocol violation. +*/ +static int lockBtree(Btree *pBt){ +  int rc; +  if( pBt->page1 ) return STQLITE_OK; +  rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1); +  if( rc!=STQLITE_OK ) return rc; + +  /* Do some checking to help insure the file we opened really is +  ** a valid database file.  +  */ +  if( sqlitepager_pagecount(pBt->pPager)>0 ){ +    PageOne *pP1 = pBt->page1; +    if( strcmp(pP1->zMagic,zMagicHeader)!=0 || +          (pP1->iMagic!=MAGIC && swab32(pP1->iMagic)!=MAGIC) ){ +      rc = STQLITE_NOTADB; +      goto page1_init_failed; +    } +    pBt->needSwab = pP1->iMagic!=MAGIC; +  } +  return rc; + +page1_init_failed: +  sqlitepager_unref(pBt->page1); +  pBt->page1 = 0; +  return rc; +} + +/* +** If there are no outstanding cursors and we are not in the middle +** of a transaction but there is a read lock on the database, then +** this routine unrefs the first page of the database file which  +** has the effect of releasing the read lock. +** +** If there are any outstanding cursors, this routine is a no-op. +** +** If there is a transaction in progress, this routine is a no-op. +*/ +static void unlockBtreeIfUnused(Btree *pBt){ +  if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){ +    sqlitepager_unref(pBt->page1); +    pBt->page1 = 0; +    pBt->inTrans = 0; +    pBt->inCkpt = 0; +  } +} + +/* +** Create a new database by initializing the first two pages of the +** file. +*/ +static int newDatabase(Btree *pBt){ +  MemPage *pRoot; +  PageOne *pP1; +  int rc; +  if( sqlitepager_pagecount(pBt->pPager)>1 ) return STQLITE_OK; +  pP1 = pBt->page1; +  rc = sqlitepager_write(pBt->page1); +  if( rc ) return rc; +  rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot); +  if( rc ) return rc; +  rc = sqlitepager_write(pRoot); +  if( rc ){ +    sqlitepager_unref(pRoot); +    return rc; +  } +  strcpy(pP1->zMagic, zMagicHeader); +  if( btree_native_byte_order ){ +    pP1->iMagic = MAGIC; +    pBt->needSwab = 0; +  }else{ +    pP1->iMagic = swab32(MAGIC); +    pBt->needSwab = 1; +  } +  zeroPage(pBt, pRoot); +  sqlitepager_unref(pRoot); +  return STQLITE_OK; +} + +/* +** Attempt to start a new transaction. +** +** A transaction must be started before attempting any changes +** to the database.  None of the following routines will work +** unless a transaction is started first: +** +**      sqliteBtreeCreateTable() +**      sqliteBtreeCreateIndex() +**      sqliteBtreeClearTable() +**      sqliteBtreeDropTable() +**      sqliteBtreeInsert() +**      sqliteBtreeDelete() +**      sqliteBtreeUpdateMeta() +*/ +static int fileBtreeBeginTrans(Btree *pBt){ +  int rc; +  if( pBt->inTrans ) return STQLITE_ERROR; +  if( pBt->readOnly ) return STQLITE_READONLY; +  if( pBt->page1==0 ){ +    rc = lockBtree(pBt); +    if( rc!=STQLITE_OK ){ +      return rc; +    } +  } +  rc = sqlitepager_begin(pBt->page1); +  if( rc==STQLITE_OK ){ +    rc = newDatabase(pBt); +  } +  if( rc==STQLITE_OK ){ +    pBt->inTrans = 1; +    pBt->inCkpt = 0; +  }else{ +    unlockBtreeIfUnused(pBt); +  } +  return rc; +} + +/* +** Commit the transaction currently in progress. +** +** This will release the write lock on the database file.  If there +** are no active cursors, it also releases the read lock. +*/ +static int fileBtreeCommit(Btree *pBt){ +  int rc; +  rc = pBt->readOnly ? STQLITE_OK : sqlitepager_commit(pBt->pPager); +  pBt->inTrans = 0; +  pBt->inCkpt = 0; +  unlockBtreeIfUnused(pBt); +  return rc; +} + +/* +** Rollback the transaction in progress.  All cursors will be +** invalided by this operation.  Any attempt to use a cursor +** that was open at the beginning of this operation will result +** in an error. +** +** This will release the write lock on the database file.  If there +** are no active cursors, it also releases the read lock. +*/ +static int fileBtreeRollback(Btree *pBt){ +  int rc; +  BtCursor *pCur; +  if( pBt->inTrans==0 ) return STQLITE_OK; +  pBt->inTrans = 0; +  pBt->inCkpt = 0; +  rc = pBt->readOnly ? STQLITE_OK : sqlitepager_rollback(pBt->pPager); +  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ +    if( pCur->pPage && pCur->pPage->isInit==0 ){ +      sqlitepager_unref(pCur->pPage); +      pCur->pPage = 0; +    } +  } +  unlockBtreeIfUnused(pBt); +  return rc; +} + +/* +** Set the checkpoint for the current transaction.  The checkpoint serves +** as a sub-transaction that can be rolled back independently of the +** main transaction.  You must start a transaction before starting a +** checkpoint.  The checkpoint is ended automatically if the transaction +** commits or rolls back. +** +** Only one checkpoint may be active at a time.  It is an error to try +** to start a new checkpoint if another checkpoint is already active. +*/ +static int fileBtreeBeginCkpt(Btree *pBt){ +  int rc; +  if( !pBt->inTrans || pBt->inCkpt ){ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  rc = pBt->readOnly ? STQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager); +  pBt->inCkpt = 1; +  return rc; +} + + +/* +** Commit a checkpoint to transaction currently in progress.  If no +** checkpoint is active, this is a no-op. +*/ +static int fileBtreeCommitCkpt(Btree *pBt){ +  int rc; +  if( pBt->inCkpt && !pBt->readOnly ){ +    rc = sqlitepager_ckpt_commit(pBt->pPager); +  }else{ +    rc = STQLITE_OK; +  } +  pBt->inCkpt = 0; +  return rc; +} + +/* +** Rollback the checkpoint to the current transaction.  If there +** is no active checkpoint or transaction, this routine is a no-op. +** +** All cursors will be invalided by this operation.  Any attempt +** to use a cursor that was open at the beginning of this operation +** will result in an error. +*/ +static int fileBtreeRollbackCkpt(Btree *pBt){ +  int rc; +  BtCursor *pCur; +  if( pBt->inCkpt==0 || pBt->readOnly ) return STQLITE_OK; +  rc = sqlitepager_ckpt_rollback(pBt->pPager); +  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ +    if( pCur->pPage && pCur->pPage->isInit==0 ){ +      sqlitepager_unref(pCur->pPage); +      pCur->pPage = 0; +    } +  } +  pBt->inCkpt = 0; +  return rc; +} + +/* +** Create a new cursor for the BTree whose root is on the page +** iTable.  The act of acquiring a cursor gets a read lock on  +** the database file. +** +** If wrFlag==0, then the cursor can only be used for reading. +** If wrFlag==1, then the cursor can be used for reading or for +** writing if other conditions for writing are also met.  These +** are the conditions that must be met in order for writing to +** be allowed: +** +** 1:  The cursor must have been opened with wrFlag==1 +** +** 2:  No other cursors may be open with wrFlag==0 on the same table +** +** 3:  The database must be writable (not on read-only media) +** +** 4:  There must be an active transaction. +** +** Condition 2 warrants further discussion.  If any cursor is opened +** on a table with wrFlag==0, that prevents all other cursors from +** writing to that table.  This is a kind of "read-lock".  When a cursor +** is opened with wrFlag==0 it is guaranteed that the table will not +** change as long as the cursor is open.  This allows the cursor to +** do a sequential scan of the table without having to worry about +** entries being inserted or deleted during the scan.  Cursors should +** be opened with wrFlag==0 only if this read-lock property is needed. +** That is to say, cursors should be opened with wrFlag==0 only if they +** intend to use the sqliteBtreeNext() system call.  All other cursors +** should be opened with wrFlag==1 even if they never really intend +** to write. +**  +** No checking is done to make sure that page iTable really is the +** root page of a b-tree.  If it is not, then the cursor acquired +** will not work correctly. +*/ +static int fileBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){ +  int rc; +  BtCursor *pCur, *pRing; + +  if( pBt->page1==0 ){ +    rc = lockBtree(pBt); +    if( rc!=STQLITE_OK ){ +      *ppCur = 0; +      return rc; +    } +  } +  pCur = sqliteMalloc( sizeof(*pCur) ); +  if( pCur==0 ){ +    rc = STQLITE_NOMEM; +    goto create_cursor_exception; +  } +  pCur->pgnoRoot = (Pgno)iTable; +  rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage); +  if( rc!=STQLITE_OK ){ +    goto create_cursor_exception; +  } +  rc = initPage(pBt, pCur->pPage, pCur->pgnoRoot, 0); +  if( rc!=STQLITE_OK ){ +    goto create_cursor_exception; +  } +  pCur->pOps = &sqliteBtreeCursorOps; +  pCur->pBt = pBt; +  pCur->wrFlag = wrFlag; +  pCur->idx = 0; +  pCur->eSkip = SKIP_INVALID; +  pCur->pNext = pBt->pCursor; +  if( pCur->pNext ){ +    pCur->pNext->pPrev = pCur; +  } +  pCur->pPrev = 0; +  pRing = pBt->pCursor; +  while( pRing && pRing->pgnoRoot!=pCur->pgnoRoot ){ pRing = pRing->pNext; } +  if( pRing ){ +    pCur->pShared = pRing->pShared; +    pRing->pShared = pCur; +  }else{ +    pCur->pShared = pCur; +  } +  pBt->pCursor = pCur; +  *ppCur = pCur; +  return STQLITE_OK; + +create_cursor_exception: +  *ppCur = 0; +  if( pCur ){ +    if( pCur->pPage ) sqlitepager_unref(pCur->pPage); +    sqliteFree(pCur); +  } +  unlockBtreeIfUnused(pBt); +  return rc; +} + +/* +** Close a cursor.  The read lock on the database file is released +** when the last cursor is closed. +*/ +static int fileBtreeCloseCursor(BtCursor *pCur){ +  Btree *pBt = pCur->pBt; +  if( pCur->pPrev ){ +    pCur->pPrev->pNext = pCur->pNext; +  }else{ +    pBt->pCursor = pCur->pNext; +  } +  if( pCur->pNext ){ +    pCur->pNext->pPrev = pCur->pPrev; +  } +  if( pCur->pPage ){ +    sqlitepager_unref(pCur->pPage); +  } +  if( pCur->pShared!=pCur ){ +    BtCursor *pRing = pCur->pShared; +    while( pRing->pShared!=pCur ){ pRing = pRing->pShared; } +    pRing->pShared = pCur->pShared; +  } +  unlockBtreeIfUnused(pBt); +  sqliteFree(pCur); +  return STQLITE_OK; +} + +/* +** Make a temporary cursor by filling in the fields of pTempCur. +** The temporary cursor is not on the cursor list for the Btree. +*/ +static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){ +  memcpy(pTempCur, pCur, sizeof(*pCur)); +  pTempCur->pNext = 0; +  pTempCur->pPrev = 0; +  if( pTempCur->pPage ){ +    sqlitepager_ref(pTempCur->pPage); +  } +} + +/* +** Delete a temporary cursor such as was made by the CreateTemporaryCursor() +** function above. +*/ +static void releaseTempCursor(BtCursor *pCur){ +  if( pCur->pPage ){ +    sqlitepager_unref(pCur->pPage); +  } +} + +/* +** Set *pSize to the number of bytes of key in the entry the +** cursor currently points to.  Always return STQLITE_OK. +** Failure is not possible.  If the cursor is not currently +** pointing to an entry (which can happen, for example, if +** the database is empty) then *pSize is set to 0. +*/ +static int fileBtreeKeySize(BtCursor *pCur, int *pSize){ +  Cell *pCell; +  MemPage *pPage; + +  pPage = pCur->pPage; +  assert( pPage!=0 ); +  if( pCur->idx >= pPage->nCell ){ +    *pSize = 0; +  }else{ +    pCell = pPage->apCell[pCur->idx]; +    *pSize = NKEY(pCur->pBt, pCell->h); +  } +  return STQLITE_OK; +} + +/* +** Read payload information from the entry that the pCur cursor is +** pointing to.  Begin reading the payload at "offset" and read +** a total of "amt" bytes.  Put the result in zBuf. +** +** This routine does not make a distinction between key and data. +** It just reads bytes from the payload area. +*/ +static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){ +  char *aPayload; +  Pgno nextPage; +  int rc; +  Btree *pBt = pCur->pBt; +  assert( pCur!=0 && pCur->pPage!=0 ); +  assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell ); +  aPayload = pCur->pPage->apCell[pCur->idx]->aPayload; +  if( offset<MX_LOCAL_PAYLOAD ){ +    int a = amt; +    if( a+offset>MX_LOCAL_PAYLOAD ){ +      a = MX_LOCAL_PAYLOAD - offset; +    } +    memcpy(zBuf, &aPayload[offset], a); +    if( a==amt ){ +      return STQLITE_OK; +    } +    offset = 0; +    zBuf += a; +    amt -= a; +  }else{ +    offset -= MX_LOCAL_PAYLOAD; +  } +  if( amt>0 ){ +    nextPage = SWAB32(pBt, pCur->pPage->apCell[pCur->idx]->ovfl); +  } +  while( amt>0 && nextPage ){ +    OverflowPage *pOvfl; +    rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl); +    if( rc!=0 ){ +      return rc; +    } +    nextPage = SWAB32(pBt, pOvfl->iNext); +    if( offset<OVERFLOW_SIZE ){ +      int a = amt; +      if( a + offset > OVERFLOW_SIZE ){ +        a = OVERFLOW_SIZE - offset; +      } +      memcpy(zBuf, &pOvfl->aPayload[offset], a); +      offset = 0; +      amt -= a; +      zBuf += a; +    }else{ +      offset -= OVERFLOW_SIZE; +    } +    sqlitepager_unref(pOvfl); +  } +  if( amt>0 ){ +    return STQLITE_CORRUPT; +  } +  return STQLITE_OK; +} + +/* +** Read part of the key associated with cursor pCur.  A maximum +** of "amt" bytes will be transfered into zBuf[].  The transfer +** begins at "offset".  The number of bytes actually read is +** returned.  +** +** Change:  It used to be that the amount returned will be smaller +** than the amount requested if there are not enough bytes in the key +** to satisfy the request.  But now, it must be the case that there +** is enough data available to satisfy the request.  If not, an exception +** is raised.  The change was made in an effort to boost performance +** by eliminating unneeded tests. +*/ +static int fileBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){ +  MemPage *pPage; + +  assert( amt>=0 ); +  assert( offset>=0 ); +  assert( pCur->pPage!=0 ); +  pPage = pCur->pPage; +  if( pCur->idx >= pPage->nCell ){ +    return 0; +  } +  assert( amt+offset <= NKEY(pCur->pBt, pPage->apCell[pCur->idx]->h) ); +  getPayload(pCur, offset, amt, zBuf); +  return amt; +} + +/* +** Set *pSize to the number of bytes of data in the entry the +** cursor currently points to.  Always return STQLITE_OK. +** Failure is not possible.  If the cursor is not currently +** pointing to an entry (which can happen, for example, if +** the database is empty) then *pSize is set to 0. +*/ +static int fileBtreeDataSize(BtCursor *pCur, int *pSize){ +  Cell *pCell; +  MemPage *pPage; + +  pPage = pCur->pPage; +  assert( pPage!=0 ); +  if( pCur->idx >= pPage->nCell ){ +    *pSize = 0; +  }else{ +    pCell = pPage->apCell[pCur->idx]; +    *pSize = NDATA(pCur->pBt, pCell->h); +  } +  return STQLITE_OK; +} + +/* +** Read part of the data associated with cursor pCur.  A maximum +** of "amt" bytes will be transfered into zBuf[].  The transfer +** begins at "offset".  The number of bytes actually read is +** returned.  The amount returned will be smaller than the +** amount requested if there are not enough bytes in the data +** to satisfy the request. +*/ +static int fileBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){ +  Cell *pCell; +  MemPage *pPage; + +  assert( amt>=0 ); +  assert( offset>=0 ); +  assert( pCur->pPage!=0 ); +  pPage = pCur->pPage; +  if( pCur->idx >= pPage->nCell ){ +    return 0; +  } +  pCell = pPage->apCell[pCur->idx]; +  assert( amt+offset <= NDATA(pCur->pBt, pCell->h) ); +  getPayload(pCur, offset + NKEY(pCur->pBt, pCell->h), amt, zBuf); +  return amt; +} + +/* +** Compare an external key against the key on the entry that pCur points to. +** +** The external key is pKey and is nKey bytes long.  The last nIgnore bytes +** of the key associated with pCur are ignored, as if they do not exist. +** (The normal case is for nIgnore to be zero in which case the entire +** internal key is used in the comparison.) +** +** The comparison result is written to *pRes as follows: +** +**    *pRes<0    This means pCur<pKey +** +**    *pRes==0   This means pCur==pKey for all nKey bytes +** +**    *pRes>0    This means pCur>pKey +** +** When one key is an exact prefix of the other, the shorter key is +** considered less than the longer one.  In order to be equal the +** keys must be exactly the same length. (The length of the pCur key +** is the actual key length minus nIgnore bytes.) +*/ +static int fileBtreeKeyCompare( +  BtCursor *pCur,       /* Pointer to entry to compare against */ +  const void *pKey,     /* Key to compare against entry that pCur points to */ +  int nKey,             /* Number of bytes in pKey */ +  int nIgnore,          /* Ignore this many bytes at the end of pCur */ +  int *pResult          /* Write the result here */ +){ +  Pgno nextPage; +  int n, c, rc, nLocal; +  Cell *pCell; +  Btree *pBt = pCur->pBt; +  const char *zKey  = (const char*)pKey; + +  assert( pCur->pPage ); +  assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell ); +  pCell = pCur->pPage->apCell[pCur->idx]; +  nLocal = NKEY(pBt, pCell->h) - nIgnore; +  if( nLocal<0 ) nLocal = 0; +  n = nKey<nLocal ? nKey : nLocal; +  if( n>MX_LOCAL_PAYLOAD ){ +    n = MX_LOCAL_PAYLOAD; +  } +  c = memcmp(pCell->aPayload, zKey, n); +  if( c!=0 ){ +    *pResult = c; +    return STQLITE_OK; +  } +  zKey += n; +  nKey -= n; +  nLocal -= n; +  nextPage = SWAB32(pBt, pCell->ovfl); +  while( nKey>0 && nLocal>0 ){ +    OverflowPage *pOvfl; +    if( nextPage==0 ){ +      return STQLITE_CORRUPT; +    } +    rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl); +    if( rc ){ +      return rc; +    } +    nextPage = SWAB32(pBt, pOvfl->iNext); +    n = nKey<nLocal ? nKey : nLocal; +    if( n>OVERFLOW_SIZE ){ +      n = OVERFLOW_SIZE; +    } +    c = memcmp(pOvfl->aPayload, zKey, n); +    sqlitepager_unref(pOvfl); +    if( c!=0 ){ +      *pResult = c; +      return STQLITE_OK; +    } +    nKey -= n; +    nLocal -= n; +    zKey += n; +  } +  if( c==0 ){ +    c = nLocal - nKey; +  } +  *pResult = c; +  return STQLITE_OK; +} + +/* +** Move the cursor down to a new child page.  The newPgno argument is the +** page number of the child page in the byte order of the disk image. +*/ +static int moveToChild(BtCursor *pCur, int newPgno){ +  int rc; +  MemPage *pNewPage; +  Btree *pBt = pCur->pBt; + +  newPgno = SWAB32(pBt, newPgno); +  rc = sqlitepager_get(pBt->pPager, newPgno, (void**)&pNewPage); +  if( rc ) return rc; +  rc = initPage(pBt, pNewPage, newPgno, pCur->pPage); +  if( rc ) return rc; +  assert( pCur->idx>=pCur->pPage->nCell +          || pCur->pPage->apCell[pCur->idx]->h.leftChild==SWAB32(pBt,newPgno) ); +  assert( pCur->idx<pCur->pPage->nCell +          || pCur->pPage->u.hdr.rightChild==SWAB32(pBt,newPgno) ); +  pNewPage->idxParent = pCur->idx; +  pCur->pPage->idxShift = 0; +  sqlitepager_unref(pCur->pPage); +  pCur->pPage = pNewPage; +  pCur->idx = 0; +  if( pNewPage->nCell<1 ){ +    return STQLITE_CORRUPT; +  } +  return STQLITE_OK; +} + +/* +** Move the cursor up to the tqparent page. +** +** pCur->idx is set to the cell index that tqcontains the pointer +** to the page we are coming from.  If we are coming from the +** right-most child page then pCur->idx is set to one more than +** the largest cell index. +*/ +static void moveToParent(BtCursor *pCur){ +  Pgno oldPgno; +  MemPage *pParent; +  MemPage *pPage; +  int idxParent; +  pPage = pCur->pPage; +  assert( pPage!=0 ); +  pParent = pPage->pParent; +  assert( pParent!=0 ); +  idxParent = pPage->idxParent; +  sqlitepager_ref(pParent); +  sqlitepager_unref(pPage); +  pCur->pPage = pParent; +  assert( pParent->idxShift==0 ); +  if( pParent->idxShift==0 ){ +    pCur->idx = idxParent; +#ifndef NDEBUG   +    /* Verify that pCur->idx is the correct index to point back to the child +    ** page we just came from  +    */ +    oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage)); +    if( pCur->idx<pParent->nCell ){ +      assert( pParent->apCell[idxParent]->h.leftChild==oldPgno ); +    }else{ +      assert( pParent->u.hdr.rightChild==oldPgno ); +    } +#endif +  }else{ +    /* The MemPage.idxShift flag indicates that cell indices might have  +    ** changed since idxParent was set and hence idxParent might be out +    ** of date.  So recompute the tqparent cell index by scanning all cells +    ** and locating the one that points to the child we just came from. +    */ +    int i; +    pCur->idx = pParent->nCell; +    oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage)); +    for(i=0; i<pParent->nCell; i++){ +      if( pParent->apCell[i]->h.leftChild==oldPgno ){ +        pCur->idx = i; +        break; +      } +    } +  } +} + +/* +** Move the cursor to the root page +*/ +static int moveToRoot(BtCursor *pCur){ +  MemPage *pNew; +  int rc; +  Btree *pBt = pCur->pBt; + +  rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pNew); +  if( rc ) return rc; +  rc = initPage(pBt, pNew, pCur->pgnoRoot, 0); +  if( rc ) return rc; +  sqlitepager_unref(pCur->pPage); +  pCur->pPage = pNew; +  pCur->idx = 0; +  return STQLITE_OK; +} + +/* +** Move the cursor down to the left-most leaf entry beneath the +** entry to which it is currently pointing. +*/ +static int moveToLeftmost(BtCursor *pCur){ +  Pgno pgno; +  int rc; + +  while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){ +    rc = moveToChild(pCur, pgno); +    if( rc ) return rc; +  } +  return STQLITE_OK; +} + +/* +** Move the cursor down to the right-most leaf entry beneath the +** page to which it is currently pointing.  Notice the difference +** between moveToLeftmost() and moveToRightmost().  moveToLeftmost() +** tqfinds the left-most entry beneath the *entry* whereas moveToRightmost() +** tqfinds the right-most entry beneath the *page*. +*/ +static int moveToRightmost(BtCursor *pCur){ +  Pgno pgno; +  int rc; + +  while( (pgno = pCur->pPage->u.hdr.rightChild)!=0 ){ +    pCur->idx = pCur->pPage->nCell; +    rc = moveToChild(pCur, pgno); +    if( rc ) return rc; +  } +  pCur->idx = pCur->pPage->nCell - 1; +  return STQLITE_OK; +} + +/* Move the cursor to the first entry in the table.  Return STQLITE_OK +** on success.  Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +static int fileBtreeFirst(BtCursor *pCur, int *pRes){ +  int rc; +  if( pCur->pPage==0 ) return STQLITE_ABORT; +  rc = moveToRoot(pCur); +  if( rc ) return rc; +  if( pCur->pPage->nCell==0 ){ +    *pRes = 1; +    return STQLITE_OK; +  } +  *pRes = 0; +  rc = moveToLeftmost(pCur); +  pCur->eSkip = SKIP_NONE; +  return rc; +} + +/* Move the cursor to the last entry in the table.  Return STQLITE_OK +** on success.  Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +static int fileBtreeLast(BtCursor *pCur, int *pRes){ +  int rc; +  if( pCur->pPage==0 ) return STQLITE_ABORT; +  rc = moveToRoot(pCur); +  if( rc ) return rc; +  assert( pCur->pPage->isInit ); +  if( pCur->pPage->nCell==0 ){ +    *pRes = 1; +    return STQLITE_OK; +  } +  *pRes = 0; +  rc = moveToRightmost(pCur); +  pCur->eSkip = SKIP_NONE; +  return rc; +} + +/* Move the cursor so that it points to an entry near pKey. +** Return a success code. +** +** If an exact match is not found, then the cursor is always +** left pointing at a leaf page which would hold the entry if it +** were present.  The cursor might point to an entry that comes +** before or after the key. +** +** The result of comparing the key with the entry to which the +** cursor is left pointing is stored in pCur->iMatch.  The same +** value is also written to *pRes if pRes!=NULL.  The meaning of +** this value is as follows: +** +**     *pRes<0      The cursor is left pointing at an entry that +**                  is smaller than pKey or if the table is empty +**                  and the cursor is therefore left point to nothing. +** +**     *pRes==0     The cursor is left pointing at an entry that +**                  exactly matches pKey. +** +**     *pRes>0      The cursor is left pointing at an entry that +**                  is larger than pKey. +*/ +static +int fileBtreeMoveto(BtCursor *pCur, const void *pKey, int nKey, int *pRes){ +  int rc; +  if( pCur->pPage==0 ) return STQLITE_ABORT; +  pCur->eSkip = SKIP_NONE; +  rc = moveToRoot(pCur); +  if( rc ) return rc; +  for(;;){ +    int lwr, upr; +    Pgno chldPg; +    MemPage *pPage = pCur->pPage; +    int c = -1;  /* pRes return if table is empty must be -1 */ +    lwr = 0; +    upr = pPage->nCell-1; +    while( lwr<=upr ){ +      pCur->idx = (lwr+upr)/2; +      rc = fileBtreeKeyCompare(pCur, pKey, nKey, 0, &c); +      if( rc ) return rc; +      if( c==0 ){ +        pCur->iMatch = c; +        if( pRes ) *pRes = 0; +        return STQLITE_OK; +      } +      if( c<0 ){ +        lwr = pCur->idx+1; +      }else{ +        upr = pCur->idx-1; +      } +    } +    assert( lwr==upr+1 ); +    assert( pPage->isInit ); +    if( lwr>=pPage->nCell ){ +      chldPg = pPage->u.hdr.rightChild; +    }else{ +      chldPg = pPage->apCell[lwr]->h.leftChild; +    } +    if( chldPg==0 ){ +      pCur->iMatch = c; +      if( pRes ) *pRes = c; +      return STQLITE_OK; +    } +    pCur->idx = lwr; +    rc = moveToChild(pCur, chldPg); +    if( rc ) return rc; +  } +  /* NOT REACHED */ +} + +/* +** Advance the cursor to the next entry in the database.  If +** successful then set *pRes=0.  If the cursor +** was already pointing to the last entry in the database before +** this routine was called, then set *pRes=1. +*/ +static int fileBtreeNext(BtCursor *pCur, int *pRes){ +  int rc; +  MemPage *pPage = pCur->pPage; +  assert( pRes!=0 ); +  if( pPage==0 ){ +    *pRes = 1; +    return STQLITE_ABORT; +  } +  assert( pPage->isInit ); +  assert( pCur->eSkip!=SKIP_INVALID ); +  if( pPage->nCell==0 ){ +    *pRes = 1; +    return STQLITE_OK; +  } +  assert( pCur->idx<pPage->nCell ); +  if( pCur->eSkip==SKIP_NEXT ){ +    pCur->eSkip = SKIP_NONE; +    *pRes = 0; +    return STQLITE_OK; +  } +  pCur->eSkip = SKIP_NONE; +  pCur->idx++; +  if( pCur->idx>=pPage->nCell ){ +    if( pPage->u.hdr.rightChild ){ +      rc = moveToChild(pCur, pPage->u.hdr.rightChild); +      if( rc ) return rc; +      rc = moveToLeftmost(pCur); +      *pRes = 0; +      return rc; +    } +    do{ +      if( pPage->pParent==0 ){ +        *pRes = 1; +        return STQLITE_OK; +      } +      moveToParent(pCur); +      pPage = pCur->pPage; +    }while( pCur->idx>=pPage->nCell ); +    *pRes = 0; +    return STQLITE_OK; +  } +  *pRes = 0; +  if( pPage->u.hdr.rightChild==0 ){ +    return STQLITE_OK; +  } +  rc = moveToLeftmost(pCur); +  return rc; +} + +/* +** Step the cursor to the back to the previous entry in the database.  If +** successful then set *pRes=0.  If the cursor +** was already pointing to the first entry in the database before +** this routine was called, then set *pRes=1. +*/ +static int fileBtreePrevious(BtCursor *pCur, int *pRes){ +  int rc; +  Pgno pgno; +  MemPage *pPage; +  pPage = pCur->pPage; +  if( pPage==0 ){ +    *pRes = 1; +    return STQLITE_ABORT; +  } +  assert( pPage->isInit ); +  assert( pCur->eSkip!=SKIP_INVALID ); +  if( pPage->nCell==0 ){ +    *pRes = 1; +    return STQLITE_OK; +  } +  if( pCur->eSkip==SKIP_PREV ){ +    pCur->eSkip = SKIP_NONE; +    *pRes = 0; +    return STQLITE_OK; +  } +  pCur->eSkip = SKIP_NONE; +  assert( pCur->idx>=0 ); +  if( (pgno = pPage->apCell[pCur->idx]->h.leftChild)!=0 ){ +    rc = moveToChild(pCur, pgno); +    if( rc ) return rc; +    rc = moveToRightmost(pCur); +  }else{ +    while( pCur->idx==0 ){ +      if( pPage->pParent==0 ){ +        if( pRes ) *pRes = 1; +        return STQLITE_OK; +      } +      moveToParent(pCur); +      pPage = pCur->pPage; +    } +    pCur->idx--; +    rc = STQLITE_OK; +  } +  *pRes = 0; +  return rc; +} + +/* +** Allocate a new page from the database file. +** +** The new page is marked as dirty.  (In other words, sqlitepager_write() +** has already been called on the new page.)  The new page has also +** been referenced and the calling routine is responsible for calling +** sqlitepager_unref() on the new page when it is done. +** +** STQLITE_OK is returned on success.  Any other return value indicates +** an error.  *ppPage and *pPgno are undefined in the event of an error. +** Do not invoke sqlitepager_unref() on *ppPage if an error is returned. +** +** If the "nearby" parameter is not 0, then a (feeble) effort is made to  +** locate a page close to the page number "nearby".  This can be used in an +** attempt to keep related pages close to each other in the database file, +** which in turn can make database access faster. +*/ +static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby){ +  PageOne *pPage1 = pBt->page1; +  int rc; +  if( pPage1->freeList ){ +    OverflowPage *pOvfl; +    FreelistInfo *pInfo; + +    rc = sqlitepager_write(pPage1); +    if( rc ) return rc; +    SWAB_ADD(pBt, pPage1->nFree, -1); +    rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList), +                        (void**)&pOvfl); +    if( rc ) return rc; +    rc = sqlitepager_write(pOvfl); +    if( rc ){ +      sqlitepager_unref(pOvfl); +      return rc; +    } +    pInfo = (FreelistInfo*)pOvfl->aPayload; +    if( pInfo->nFree==0 ){ +      *pPgno = SWAB32(pBt, pPage1->freeList); +      pPage1->freeList = pOvfl->iNext; +      *ppPage = (MemPage*)pOvfl; +    }else{ +      int closest, n; +      n = SWAB32(pBt, pInfo->nFree); +      if( n>1 && nearby>0 ){ +        int i, dist; +        closest = 0; +        dist = SWAB32(pBt, pInfo->aFree[0]) - nearby; +        if( dist<0 ) dist = -dist; +        for(i=1; i<n; i++){ +          int d2 = SWAB32(pBt, pInfo->aFree[i]) - nearby; +          if( d2<0 ) d2 = -d2; +          if( d2<dist ) closest = i; +        } +      }else{ +        closest = 0; +      } +      SWAB_ADD(pBt, pInfo->nFree, -1); +      *pPgno = SWAB32(pBt, pInfo->aFree[closest]); +      pInfo->aFree[closest] = pInfo->aFree[n-1]; +      rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage); +      sqlitepager_unref(pOvfl); +      if( rc==STQLITE_OK ){ +        sqlitepager_dont_rollback(*ppPage); +        rc = sqlitepager_write(*ppPage); +      } +    } +  }else{ +    *pPgno = sqlitepager_pagecount(pBt->pPager) + 1; +    rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage); +    if( rc ) return rc; +    rc = sqlitepager_write(*ppPage); +  } +  return rc; +} + +/* +** Add a page of the database file to the freelist.  Either pgno or +** pPage but not both may be 0.  +** +** sqlitepager_unref() is NOT called for pPage. +*/ +static int freePage(Btree *pBt, void *pPage, Pgno pgno){ +  PageOne *pPage1 = pBt->page1; +  OverflowPage *pOvfl = (OverflowPage*)pPage; +  int rc; +  int needUnref = 0; +  MemPage *pMemPage; + +  if( pgno==0 ){ +    assert( pOvfl!=0 ); +    pgno = sqlitepager_pagenumber(pOvfl); +  } +  assert( pgno>2 ); +  assert( sqlitepager_pagenumber(pOvfl)==pgno ); +  pMemPage = (MemPage*)pPage; +  pMemPage->isInit = 0; +  if( pMemPage->pParent ){ +    sqlitepager_unref(pMemPage->pParent); +    pMemPage->pParent = 0; +  } +  rc = sqlitepager_write(pPage1); +  if( rc ){ +    return rc; +  } +  SWAB_ADD(pBt, pPage1->nFree, 1); +  if( pPage1->nFree!=0 && pPage1->freeList!=0 ){ +    OverflowPage *pFreeIdx; +    rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList), +                        (void**)&pFreeIdx); +    if( rc==STQLITE_OK ){ +      FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload; +      int n = SWAB32(pBt, pInfo->nFree); +      if( n<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){ +        rc = sqlitepager_write(pFreeIdx); +        if( rc==STQLITE_OK ){ +          pInfo->aFree[n] = SWAB32(pBt, pgno); +          SWAB_ADD(pBt, pInfo->nFree, 1); +          sqlitepager_unref(pFreeIdx); +          sqlitepager_dont_write(pBt->pPager, pgno); +          return rc; +        } +      } +      sqlitepager_unref(pFreeIdx); +    } +  } +  if( pOvfl==0 ){ +    assert( pgno>0 ); +    rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl); +    if( rc ) return rc; +    needUnref = 1; +  } +  rc = sqlitepager_write(pOvfl); +  if( rc ){ +    if( needUnref ) sqlitepager_unref(pOvfl); +    return rc; +  } +  pOvfl->iNext = pPage1->freeList; +  pPage1->freeList = SWAB32(pBt, pgno); +  memset(pOvfl->aPayload, 0, OVERFLOW_SIZE); +  if( needUnref ) rc = sqlitepager_unref(pOvfl); +  return rc; +} + +/* +** Erase all the data out of a cell.  This involves returning overflow +** pages back the freelist. +*/ +static int clearCell(Btree *pBt, Cell *pCell){ +  Pager *pPager = pBt->pPager; +  OverflowPage *pOvfl; +  Pgno ovfl, nextOvfl; +  int rc; + +  if( NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h) <= MX_LOCAL_PAYLOAD ){ +    return STQLITE_OK; +  } +  ovfl = SWAB32(pBt, pCell->ovfl); +  pCell->ovfl = 0; +  while( ovfl ){ +    rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl); +    if( rc ) return rc; +    nextOvfl = SWAB32(pBt, pOvfl->iNext); +    rc = freePage(pBt, pOvfl, ovfl); +    if( rc ) return rc; +    sqlitepager_unref(pOvfl); +    ovfl = nextOvfl; +  } +  return STQLITE_OK; +} + +/* +** Create a new cell from key and data.  Overflow pages are allocated as +** necessary and linked to this cell.   +*/ +static int fillInCell( +  Btree *pBt,              /* The whole Btree.  Needed to allocate pages */ +  Cell *pCell,             /* Populate this Cell structure */ +  const void *pKey, int nKey,    /* The key */ +  const void *pData,int nData    /* The data */ +){ +  OverflowPage *pOvfl, *pPrior; +  Pgno *pNext; +  int spaceLeft; +  int n, rc; +  int nPayload; +  const char *pPayload; +  char *pSpace; +  Pgno nearby = 0; + +  pCell->h.leftChild = 0; +  pCell->h.nKey = SWAB16(pBt, nKey & 0xffff); +  pCell->h.nKeyHi = nKey >> 16; +  pCell->h.nData = SWAB16(pBt, nData & 0xffff); +  pCell->h.nDataHi = nData >> 16; +  pCell->h.iNext = 0; + +  pNext = &pCell->ovfl; +  pSpace = pCell->aPayload; +  spaceLeft = MX_LOCAL_PAYLOAD; +  pPayload = pKey; +  pKey = 0; +  nPayload = nKey; +  pPrior = 0; +  while( nPayload>0 ){ +    if( spaceLeft==0 ){ +      rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext, nearby); +      if( rc ){ +        *pNext = 0; +      }else{ +        nearby = *pNext; +      } +      if( pPrior ) sqlitepager_unref(pPrior); +      if( rc ){ +        clearCell(pBt, pCell); +        return rc; +      } +      if( pBt->needSwab ) *pNext = swab32(*pNext); +      pPrior = pOvfl; +      spaceLeft = OVERFLOW_SIZE; +      pSpace = pOvfl->aPayload; +      pNext = &pOvfl->iNext; +    } +    n = nPayload; +    if( n>spaceLeft ) n = spaceLeft; +    memcpy(pSpace, pPayload, n); +    nPayload -= n; +    if( nPayload==0 && pData ){ +      pPayload = pData; +      nPayload = nData; +      pData = 0; +    }else{ +      pPayload += n; +    } +    spaceLeft -= n; +    pSpace += n; +  } +  *pNext = 0; +  if( pPrior ){ +    sqlitepager_unref(pPrior); +  } +  return STQLITE_OK; +} + +/* +** Change the MemPage.pParent pointer on the page whose number is +** given in the second argument so that MemPage.pParent holds the +** pointer in the third argument. +*/ +static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent,int idx){ +  MemPage *pThis; + +  if( pgno==0 ) return; +  assert( pPager!=0 ); +  pThis = sqlitepager_lookup(pPager, pgno); +  if( pThis && pThis->isInit ){ +    if( pThis->pParent!=pNewParent ){ +      if( pThis->pParent ) sqlitepager_unref(pThis->pParent); +      pThis->pParent = pNewParent; +      if( pNewParent ) sqlitepager_ref(pNewParent); +    } +    pThis->idxParent = idx; +    sqlitepager_unref(pThis); +  } +} + +/* +** Retqparent all tqchildren of the given page to be the given page. +** In other words, for every child of pPage, invoke reparentPage() +** to make sure that each child knows that pPage is its tqparent. +** +** This routine gets called after you memcpy() one page into +** another. +*/ +static void reparentChildPages(Btree *pBt, MemPage *pPage){ +  int i; +  Pager *pPager = pBt->pPager; +  for(i=0; i<pPage->nCell; i++){ +    reparentPage(pPager, SWAB32(pBt, pPage->apCell[i]->h.leftChild), pPage, i); +  } +  reparentPage(pPager, SWAB32(pBt, pPage->u.hdr.rightChild), pPage, i); +  pPage->idxShift = 0; +} + +/* +** Remove the i-th cell from pPage.  This routine effects pPage only. +** The cell content is not freed or deallocated.  It is assumed that +** the cell content has been copied someplace else.  This routine just +** removes the reference to the cell from pPage. +** +** "sz" must be the number of bytes in the cell. +** +** Do not bother maintaining the integrity of the linked list of Cells. +** Only the pPage->apCell[] array is important.  The relinkCellList()  +** routine will be called soon after this routine in order to rebuild  +** the linked list. +*/ +static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){ +  int j; +  assert( idx>=0 && idx<pPage->nCell ); +  assert( sz==cellSize(pBt, pPage->apCell[idx]) ); +  assert( sqlitepager_iswriteable(pPage) ); +  freeSpace(pBt, pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz); +  for(j=idx; j<pPage->nCell-1; j++){ +    pPage->apCell[j] = pPage->apCell[j+1]; +  } +  pPage->nCell--; +  pPage->idxShift = 1; +} + +/* +** Insert a new cell on pPage at cell index "i".  pCell points to the +** content of the cell. +** +** If the cell content will fit on the page, then put it there.  If it +** will not fit, then just make pPage->apCell[i] point to the content +** and set pPage->isOverfull.   +** +** Do not bother maintaining the integrity of the linked list of Cells. +** Only the pPage->apCell[] array is important.  The relinkCellList()  +** routine will be called soon after this routine in order to rebuild  +** the linked list. +*/ +static void insertCell(Btree *pBt, MemPage *pPage, int i, Cell *pCell, int sz){ +  int idx, j; +  assert( i>=0 && i<=pPage->nCell ); +  assert( sz==cellSize(pBt, pCell) ); +  assert( sqlitepager_iswriteable(pPage) ); +  idx = allocateSpace(pBt, pPage, sz); +  for(j=pPage->nCell; j>i; j--){ +    pPage->apCell[j] = pPage->apCell[j-1]; +  } +  pPage->nCell++; +  if( idx<=0 ){ +    pPage->isOverfull = 1; +    pPage->apCell[i] = pCell; +  }else{ +    memcpy(&pPage->u.aDisk[idx], pCell, sz); +    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx]; +  } +  pPage->idxShift = 1; +} + +/* +** Rebuild the linked list of cells on a page so that the cells +** occur in the order specified by the pPage->apCell[] array.   +** Invoke this routine once to repair damage after one or more +** invocations of either insertCell() or dropCell(). +*/ +static void relinkCellList(Btree *pBt, MemPage *pPage){ +  int i; +  u16 *pIdx; +  assert( sqlitepager_iswriteable(pPage) ); +  pIdx = &pPage->u.hdr.firstCell; +  for(i=0; i<pPage->nCell; i++){ +    int idx = Addr(pPage->apCell[i]) - Addr(pPage); +    assert( idx>0 && idx<STQLITE_USABLE_SIZE ); +    *pIdx = SWAB16(pBt, idx); +    pIdx = &pPage->apCell[i]->h.iNext; +  } +  *pIdx = 0; +} + +/* +** Make a copy of the contents of pFrom into pTo.  The pFrom->apCell[] +** pointers that point into pFrom->u.aDisk[] must be adjusted to point +** into pTo->u.aDisk[] instead.  But some pFrom->apCell[] entries might +** not point to pFrom->u.aDisk[].  Those are unchanged. +*/ +static void copyPage(MemPage *pTo, MemPage *pFrom){ +  uptr from, to; +  int i; +  memcpy(pTo->u.aDisk, pFrom->u.aDisk, STQLITE_USABLE_SIZE); +  pTo->pParent = 0; +  pTo->isInit = 1; +  pTo->nCell = pFrom->nCell; +  pTo->nFree = pFrom->nFree; +  pTo->isOverfull = pFrom->isOverfull; +  to = Addr(pTo); +  from = Addr(pFrom); +  for(i=0; i<pTo->nCell; i++){ +    uptr x = Addr(pFrom->apCell[i]); +    if( x>from && x<from+STQLITE_USABLE_SIZE ){ +      *((uptr*)&pTo->apCell[i]) = x + to - from; +    }else{ +      pTo->apCell[i] = pFrom->apCell[i]; +    } +  } +} + +/* +** The following parameters determine how many adjacent pages get involved +** in a balancing operation.  NN is the number of neighbors on either side +** of the page that participate in the balancing operation.  NB is the +** total number of pages that participate, including the target page and +** NN neighbors on either side. +** +** The minimum value of NN is 1 (of course).  Increasing NN above 1 +** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance +** in exchange for a larger degradation in INSERT and UPDATE performance. +** The value of NN appears to give the best results overall. +*/ +#define NN 1             /* Number of neighbors on either side of pPage */ +#define NB (NN*2+1)      /* Total pages involved in the balance */ + +/* +** This routine redistributes Cells on pPage and up to two siblings +** of pPage so that all pages have about the same amount of free space. +** Usually one sibling on either side of pPage is used in the balancing, +** though both siblings might come from one side if pPage is the first +** or last child of its tqparent.  If pPage has fewer than two siblings +** (something which can only happen if pPage is the root page or a  +** child of root) then all available siblings participate in the balancing. +** +** The number of siblings of pPage might be increased or decreased by +** one in an effort to keep pages between 66% and 100% full. The root page +** is special and is allowed to be less than 66% full. If pPage is  +** the root page, then the depth of the tree might be increased +** or decreased by one, as necessary, to keep the root page from being +** overfull or empty. +** +** This routine calls relinkCellList() on its input page regardless of +** whether or not it does any real balancing.  Client routines will typically +** invoke insertCell() or dropCell() before calling this routine, so we +** need to call relinkCellList() to clean up the mess that those other +** routines left behind. +** +** pCur is left pointing to the same cell as when this routine was called +** even if that cell gets moved to a different page.  pCur may be NULL. +** Set the pCur parameter to NULL if you do not care about keeping track +** of a cell as that will save this routine the work of keeping track of it. +** +** Note that when this routine is called, some of the Cells on pPage +** might not actually be stored in pPage->u.aDisk[].  This can happen +** if the page is overfull.  Part of the job of this routine is to +** make sure all Cells for pPage once again fit in pPage->u.aDisk[]. +** +** In the course of balancing the siblings of pPage, the tqparent of pPage +** might become overfull or underfull.  If that happens, then this routine +** is called recursively on the tqparent. +** +** If this routine fails for any reason, it might leave the database +** in a corrupted state.  So if this routine fails, the database should +** be rolled back. +*/ +static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){ +  MemPage *pParent;            /* The tqparent of pPage */ +  int nCell;                   /* Number of cells in apCell[] */ +  int nOld;                    /* Number of pages in apOld[] */ +  int nNew;                    /* Number of pages in apNew[] */ +  int nDiv;                    /* Number of cells in apDiv[] */ +  int i, j, k;                 /* Loop counters */ +  int idx;                     /* Index of pPage in pParent->apCell[] */ +  int nxDiv;                   /* Next divider slot in pParent->apCell[] */ +  int rc;                      /* The return code */ +  int iCur;                    /* apCell[iCur] is the cell of the cursor */ +  MemPage *pOldCurPage;        /* The cursor originally points to this page */ +  int subtotal;                /* Subtotal of bytes in cells on one page */ +  MemPage *extraUnref = 0;     /* A page that needs to be unref-ed */ +  MemPage *apOld[NB];          /* pPage and up to two siblings */ +  Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */ +  MemPage *apNew[NB+1];        /* pPage and up to NB siblings after balancing */ +  Pgno pgnoNew[NB+1];          /* Page numbers for each page in apNew[] */ +  int idxDiv[NB];              /* Indices of divider cells in pParent */ +  Cell *apDiv[NB];             /* Divider cells in pParent */ +  Cell aTemp[NB];              /* Temporary holding area for apDiv[] */ +  int cntNew[NB+1];            /* Index in apCell[] of cell after i-th page */ +  int szNew[NB+1];             /* Combined size of cells place on i-th page */ +  MemPage aOld[NB];            /* Temporary copies of pPage and its siblings */ +  Cell *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */ +  int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */ + +  /*  +  ** Return without doing any work if pPage is neither overfull nor +  ** underfull. +  */ +  assert( sqlitepager_iswriteable(pPage) ); +  if( !pPage->isOverfull && pPage->nFree<STQLITE_USABLE_SIZE/2  +        && pPage->nCell>=2){ +    relinkCellList(pBt, pPage); +    return STQLITE_OK; +  } + +  /* +  ** Find the tqparent of the page to be balanceed. +  ** If there is no tqparent, it means this page is the root page and +  ** special rules apply. +  */ +  pParent = pPage->pParent; +  if( pParent==0 ){ +    Pgno pgnoChild; +    MemPage *pChild; +    assert( pPage->isInit ); +    if( pPage->nCell==0 ){ +      if( pPage->u.hdr.rightChild ){ +        /* +        ** The root page is empty.  Copy the one child page +        ** into the root page and return.  This reduces the depth +        ** of the BTree by one. +        */ +        pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild); +        rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild); +        if( rc ) return rc; +        memcpy(pPage, pChild, STQLITE_USABLE_SIZE); +        pPage->isInit = 0; +        rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0); +        assert( rc==STQLITE_OK ); +        reparentChildPages(pBt, pPage); +        if( pCur && pCur->pPage==pChild ){ +          sqlitepager_unref(pChild); +          pCur->pPage = pPage; +          sqlitepager_ref(pPage); +        } +        freePage(pBt, pChild, pgnoChild); +        sqlitepager_unref(pChild); +      }else{ +        relinkCellList(pBt, pPage); +      } +      return STQLITE_OK; +    } +    if( !pPage->isOverfull ){ +      /* It is OK for the root page to be less than half full. +      */ +      relinkCellList(pBt, pPage); +      return STQLITE_OK; +    } +    /* +    ** If we get to here, it means the root page is overfull. +    ** When this happens, Create a new child page and copy the +    ** contents of the root into the child.  Then make the root +    ** page an empty page with rightChild pointing to the new +    ** child.  Then fall thru to the code below which will cause +    ** the overfull child page to be split. +    */ +    rc = sqlitepager_write(pPage); +    if( rc ) return rc; +    rc = allocatePage(pBt, &pChild, &pgnoChild, sqlitepager_pagenumber(pPage)); +    if( rc ) return rc; +    assert( sqlitepager_iswriteable(pChild) ); +    copyPage(pChild, pPage); +    pChild->pParent = pPage; +    pChild->idxParent = 0; +    sqlitepager_ref(pPage); +    pChild->isOverfull = 1; +    if( pCur && pCur->pPage==pPage ){ +      sqlitepager_unref(pPage); +      pCur->pPage = pChild; +    }else{ +      extraUnref = pChild; +    } +    zeroPage(pBt, pPage); +    pPage->u.hdr.rightChild = SWAB32(pBt, pgnoChild); +    pParent = pPage; +    pPage = pChild; +  } +  rc = sqlitepager_write(pParent); +  if( rc ) return rc; +  assert( pParent->isInit ); +   +  /* +  ** Find the Cell in the tqparent page whose h.leftChild points back +  ** to pPage.  The "idx" variable is the index of that cell.  If pPage +  ** is the rightmost child of pParent then set idx to pParent->nCell  +  */ +  if( pParent->idxShift ){ +    Pgno pgno, swabPgno; +    pgno = sqlitepager_pagenumber(pPage); +    swabPgno = SWAB32(pBt, pgno); +    for(idx=0; idx<pParent->nCell; idx++){ +      if( pParent->apCell[idx]->h.leftChild==swabPgno ){ +        break; +      } +    } +    assert( idx<pParent->nCell || pParent->u.hdr.rightChild==swabPgno ); +  }else{ +    idx = pPage->idxParent; +  } + +  /* +  ** Initialize variables so that it will be safe to jump +  ** directly to balance_cleanup at any moment. +  */ +  nOld = nNew = 0; +  sqlitepager_ref(pParent); + +  /* +  ** Find sibling pages to pPage and the Cells in pParent that divide +  ** the siblings.  An attempt is made to tqfind NN siblings on either +  ** side of pPage.  More siblings are taken from one side, however, if +  ** pPage there are fewer than NN siblings on the other side.  If pParent +  ** has NB or fewer tqchildren then all tqchildren of pParent are taken. +  */ +  nxDiv = idx - NN; +  if( nxDiv + NB > pParent->nCell ){ +    nxDiv = pParent->nCell - NB + 1; +  } +  if( nxDiv<0 ){ +    nxDiv = 0; +  } +  nDiv = 0; +  for(i=0, k=nxDiv; i<NB; i++, k++){ +    if( k<pParent->nCell ){ +      idxDiv[i] = k; +      apDiv[i] = pParent->apCell[k]; +      nDiv++; +      pgnoOld[i] = SWAB32(pBt, apDiv[i]->h.leftChild); +    }else if( k==pParent->nCell ){ +      pgnoOld[i] = SWAB32(pBt, pParent->u.hdr.rightChild); +    }else{ +      break; +    } +    rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]); +    if( rc ) goto balance_cleanup; +    rc = initPage(pBt, apOld[i], pgnoOld[i], pParent); +    if( rc ) goto balance_cleanup; +    apOld[i]->idxParent = k; +    nOld++; +  } + +  /* +  ** Set iCur to be the index in apCell[] of the cell that the cursor +  ** is pointing to.  We will need this later on in order to keep the +  ** cursor pointing at the same cell.  If pCur points to a page that +  ** has no involvement with this rebalancing, then set iCur to a large +  ** number so that the iCur==j tests always fail in the main cell +  ** distribution loop below. +  */ +  if( pCur ){ +    iCur = 0; +    for(i=0; i<nOld; i++){ +      if( pCur->pPage==apOld[i] ){ +        iCur += pCur->idx; +        break; +      } +      iCur += apOld[i]->nCell; +      if( i<nOld-1 && pCur->pPage==pParent && pCur->idx==idxDiv[i] ){ +        break; +      } +      iCur++; +    } +    pOldCurPage = pCur->pPage; +  } + +  /* +  ** Make copies of the content of pPage and its siblings into aOld[]. +  ** The rest of this function will use data from the copies rather +  ** that the original pages since the original pages will be in the +  ** process of being overwritten. +  */ +  for(i=0; i<nOld; i++){ +    copyPage(&aOld[i], apOld[i]); +  } + +  /* +  ** Load pointers to all cells on sibling pages and the divider cells +  ** into the local apCell[] array.  Make copies of the divider cells +  ** into aTemp[] and remove the the divider Cells from pParent. +  */ +  nCell = 0; +  for(i=0; i<nOld; i++){ +    MemPage *pOld = &aOld[i]; +    for(j=0; j<pOld->nCell; j++){ +      apCell[nCell] = pOld->apCell[j]; +      szCell[nCell] = cellSize(pBt, apCell[nCell]); +      nCell++; +    } +    if( i<nOld-1 ){ +      szCell[nCell] = cellSize(pBt, apDiv[i]); +      memcpy(&aTemp[i], apDiv[i], szCell[nCell]); +      apCell[nCell] = &aTemp[i]; +      dropCell(pBt, pParent, nxDiv, szCell[nCell]); +      assert( SWAB32(pBt, apCell[nCell]->h.leftChild)==pgnoOld[i] ); +      apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild; +      nCell++; +    } +  } + +  /* +  ** Figure out the number of pages needed to hold all nCell cells. +  ** Store this number in "k".  Also compute szNew[] which is the total +  ** size of all cells on the i-th page and cntNew[] which is the index +  ** in apCell[] of the cell that divides path i from path i+1.   +  ** cntNew[k] should equal nCell. +  ** +  ** This little patch of code is critical for keeping the tree +  ** balanced.  +  */ +  for(subtotal=k=i=0; i<nCell; i++){ +    subtotal += szCell[i]; +    if( subtotal > USABLE_SPACE ){ +      szNew[k] = subtotal - szCell[i]; +      cntNew[k] = i; +      subtotal = 0; +      k++; +    } +  } +  szNew[k] = subtotal; +  cntNew[k] = nCell; +  k++; +  for(i=k-1; i>0; i--){ +    while( szNew[i]<USABLE_SPACE/2 ){ +      cntNew[i-1]--; +      assert( cntNew[i-1]>0 ); +      szNew[i] += szCell[cntNew[i-1]]; +      szNew[i-1] -= szCell[cntNew[i-1]-1]; +    } +  } +  assert( cntNew[0]>0 ); + +  /* +  ** Allocate k new pages.  Reuse old pages where possible. +  */ +  for(i=0; i<k; i++){ +    if( i<nOld ){ +      apNew[i] = apOld[i]; +      pgnoNew[i] = pgnoOld[i]; +      apOld[i] = 0; +      sqlitepager_write(apNew[i]); +    }else{ +      rc = allocatePage(pBt, &apNew[i], &pgnoNew[i], pgnoNew[i-1]); +      if( rc ) goto balance_cleanup; +    } +    nNew++; +    zeroPage(pBt, apNew[i]); +    apNew[i]->isInit = 1; +  } + +  /* Free any old pages that were not reused as new pages. +  */ +  while( i<nOld ){ +    rc = freePage(pBt, apOld[i], pgnoOld[i]); +    if( rc ) goto balance_cleanup; +    sqlitepager_unref(apOld[i]); +    apOld[i] = 0; +    i++; +  } + +  /* +  ** Put the new pages in accending order.  This helps to +  ** keep entries in the disk file in order so that a scan +  ** of the table is a linear scan through the file.  That +  ** in turn helps the operating system to deliver pages +  ** from the disk more rapidly. +  ** +  ** An O(n^2) insertion sort algorithm is used, but since +  ** n is never more than NB (a small constant), that should +  ** not be a problem. +  ** +  ** When NB==3, this one optimization makes the database +  ** about 25% faster for large insertions and deletions. +  */ +  for(i=0; i<k-1; i++){ +    int minV = pgnoNew[i]; +    int minI = i; +    for(j=i+1; j<k; j++){ +      if( pgnoNew[j]<(unsigned)minV ){ +        minI = j; +        minV = pgnoNew[j]; +      } +    } +    if( minI>i ){ +      int t; +      MemPage *pT; +      t = pgnoNew[i]; +      pT = apNew[i]; +      pgnoNew[i] = pgnoNew[minI]; +      apNew[i] = apNew[minI]; +      pgnoNew[minI] = t; +      apNew[minI] = pT; +    } +  } + +  /* +  ** Evenly distribute the data in apCell[] across the new pages. +  ** Insert divider cells into pParent as necessary. +  */ +  j = 0; +  for(i=0; i<nNew; i++){ +    MemPage *pNew = apNew[i]; +    while( j<cntNew[i] ){ +      assert( pNew->nFree>=szCell[j] ); +      if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; } +      insertCell(pBt, pNew, pNew->nCell, apCell[j], szCell[j]); +      j++; +    } +    assert( pNew->nCell>0 ); +    assert( !pNew->isOverfull ); +    relinkCellList(pBt, pNew); +    if( i<nNew-1 && j<nCell ){ +      pNew->u.hdr.rightChild = apCell[j]->h.leftChild; +      apCell[j]->h.leftChild = SWAB32(pBt, pgnoNew[i]); +      if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; } +      insertCell(pBt, pParent, nxDiv, apCell[j], szCell[j]); +      j++; +      nxDiv++; +    } +  } +  assert( j==nCell ); +  apNew[nNew-1]->u.hdr.rightChild = aOld[nOld-1].u.hdr.rightChild; +  if( nxDiv==pParent->nCell ){ +    pParent->u.hdr.rightChild = SWAB32(pBt, pgnoNew[nNew-1]); +  }else{ +    pParent->apCell[nxDiv]->h.leftChild = SWAB32(pBt, pgnoNew[nNew-1]); +  } +  if( pCur ){ +    if( j<=iCur && pCur->pPage==pParent && pCur->idx>idxDiv[nOld-1] ){ +      assert( pCur->pPage==pOldCurPage ); +      pCur->idx += nNew - nOld; +    }else{ +      assert( pOldCurPage!=0 ); +      sqlitepager_ref(pCur->pPage); +      sqlitepager_unref(pOldCurPage); +    } +  } + +  /* +  ** Retqparent tqchildren of all cells. +  */ +  for(i=0; i<nNew; i++){ +    reparentChildPages(pBt, apNew[i]); +  } +  reparentChildPages(pBt, pParent); + +  /* +  ** balance the tqparent page. +  */ +  rc = balance(pBt, pParent, pCur); + +  /* +  ** Cleanup before returning. +  */ +balance_cleanup: +  if( extraUnref ){ +    sqlitepager_unref(extraUnref); +  } +  for(i=0; i<nOld; i++){ +    if( apOld[i]!=0 && apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]); +  } +  for(i=0; i<nNew; i++){ +    sqlitepager_unref(apNew[i]); +  } +  if( pCur && pCur->pPage==0 ){ +    pCur->pPage = pParent; +    pCur->idx = 0; +  }else{ +    sqlitepager_unref(pParent); +  } +  return rc; +} + +/* +** This routine checks all cursors that point to the same table +** as pCur points to.  If any of those cursors were opened with +** wrFlag==0 then this routine returns STQLITE_LOCKED.  If all +** cursors point to the same table were opened with wrFlag==1 +** then this routine returns STQLITE_OK. +** +** In addition to checking for read-locks (where a read-lock  +** means a cursor opened with wrFlag==0) this routine also moves +** all cursors other than pCur so that they are pointing to the  +** first Cell on root page.  This is necessary because an insert  +** or delete might change the number of cells on a page or delete +** a page entirely and we do not want to leave any cursors  +** pointing to non-existant pages or cells. +*/ +static int checkReadLocks(BtCursor *pCur){ +  BtCursor *p; +  assert( pCur->wrFlag ); +  for(p=pCur->pShared; p!=pCur; p=p->pShared){ +    assert( p ); +    assert( p->pgnoRoot==pCur->pgnoRoot ); +    if( p->wrFlag==0 ) return STQLITE_LOCKED; +    if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){ +      moveToRoot(p); +    } +  } +  return STQLITE_OK; +} + +/* +** Insert a new record into the BTree.  The key is given by (pKey,nKey) +** and the data is given by (pData,nData).  The cursor is used only to +** define what database the record should be inserted into.  The cursor +** is left pointing at the new record. +*/ +static int fileBtreeInsert( +  BtCursor *pCur,                /* Insert data into the table of this cursor */ +  const void *pKey, int nKey,    /* The key of the new record */ +  const void *pData, int nData   /* The data of the new record */ +){ +  Cell newCell; +  int rc; +  int loc; +  int szNew; +  MemPage *pPage; +  Btree *pBt = pCur->pBt; + +  if( pCur->pPage==0 ){ +    return STQLITE_ABORT;  /* A rollback destroyed this cursor */ +  } +  if( !pBt->inTrans || nKey+nData==0 ){ +    /* Must start a transaction before doing an insert */ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  assert( !pBt->readOnly ); +  if( !pCur->wrFlag ){ +    return STQLITE_PERM;   /* Cursor not open for writing */ +  } +  if( checkReadLocks(pCur) ){ +    return STQLITE_LOCKED; /* The table pCur points to has a read lock */ +  } +  rc = fileBtreeMoveto(pCur, pKey, nKey, &loc); +  if( rc ) return rc; +  pPage = pCur->pPage; +  assert( pPage->isInit ); +  rc = sqlitepager_write(pPage); +  if( rc ) return rc; +  rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData); +  if( rc ) return rc; +  szNew = cellSize(pBt, &newCell); +  if( loc==0 ){ +    newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild; +    rc = clearCell(pBt, pPage->apCell[pCur->idx]); +    if( rc ) return rc; +    dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pPage->apCell[pCur->idx])); +  }else if( loc<0 && pPage->nCell>0 ){ +    assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */ +    pCur->idx++; +  }else{ +    assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */ +  } +  insertCell(pBt, pPage, pCur->idx, &newCell, szNew); +  rc = balance(pCur->pBt, pPage, pCur); +  /* sqliteBtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */ +  /* fflush(stdout); */ +  pCur->eSkip = SKIP_INVALID; +  return rc; +} + +/* +** Delete the entry that the cursor is pointing to. +** +** The cursor is left pointing at either the next or the previous +** entry.  If the cursor is left pointing to the next entry, then  +** the pCur->eSkip flag is set to SKIP_NEXT which forces the next call to  +** sqliteBtreeNext() to be a no-op.  That way, you can always call +** sqliteBtreeNext() after a delete and the cursor will be left +** pointing to the first entry after the deleted entry.  Similarly, +** pCur->eSkip is set to SKIP_PREV is the cursor is left pointing to +** the entry prior to the deleted entry so that a subsequent call to +** sqliteBtreePrevious() will always leave the cursor pointing at the +** entry immediately before the one that was deleted. +*/ +static int fileBtreeDelete(BtCursor *pCur){ +  MemPage *pPage = pCur->pPage; +  Cell *pCell; +  int rc; +  Pgno pgnoChild; +  Btree *pBt = pCur->pBt; + +  assert( pPage->isInit ); +  if( pCur->pPage==0 ){ +    return STQLITE_ABORT;  /* A rollback destroyed this cursor */ +  } +  if( !pBt->inTrans ){ +    /* Must start a transaction before doing a delete */ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  assert( !pBt->readOnly ); +  if( pCur->idx >= pPage->nCell ){ +    return STQLITE_ERROR;  /* The cursor is not pointing to anything */ +  } +  if( !pCur->wrFlag ){ +    return STQLITE_PERM;   /* Did not open this cursor for writing */ +  } +  if( checkReadLocks(pCur) ){ +    return STQLITE_LOCKED; /* The table pCur points to has a read lock */ +  } +  rc = sqlitepager_write(pPage); +  if( rc ) return rc; +  pCell = pPage->apCell[pCur->idx]; +  pgnoChild = SWAB32(pBt, pCell->h.leftChild); +  clearCell(pBt, pCell); +  if( pgnoChild ){ +    /* +    ** The entry we are about to delete is not a leaf so if we do not +    ** do something we will leave a hole on an internal page. +    ** We have to fill the hole by moving in a cell from a leaf.  The +    ** next Cell after the one to be deleted is guaranteed to exist and +    ** to be a leaf so we can use it. +    */ +    BtCursor leafCur; +    Cell *pNext; +    int szNext; +    int notUsed; +    getTempCursor(pCur, &leafCur); +    rc = fileBtreeNext(&leafCur, ¬Used); +    if( rc!=STQLITE_OK ){ +      if( rc!=STQLITE_NOMEM ) rc = STQLITE_CORRUPT; +      return rc; +    } +    rc = sqlitepager_write(leafCur.pPage); +    if( rc ) return rc; +    dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell)); +    pNext = leafCur.pPage->apCell[leafCur.idx]; +    szNext = cellSize(pBt, pNext); +    pNext->h.leftChild = SWAB32(pBt, pgnoChild); +    insertCell(pBt, pPage, pCur->idx, pNext, szNext); +    rc = balance(pBt, pPage, pCur); +    if( rc ) return rc; +    pCur->eSkip = SKIP_NEXT; +    dropCell(pBt, leafCur.pPage, leafCur.idx, szNext); +    rc = balance(pBt, leafCur.pPage, pCur); +    releaseTempCursor(&leafCur); +  }else{ +    dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell)); +    if( pCur->idx>=pPage->nCell ){ +      pCur->idx = pPage->nCell-1; +      if( pCur->idx<0 ){  +        pCur->idx = 0; +        pCur->eSkip = SKIP_NEXT; +      }else{ +        pCur->eSkip = SKIP_PREV; +      } +    }else{ +      pCur->eSkip = SKIP_NEXT; +    } +    rc = balance(pBt, pPage, pCur); +  } +  return rc; +} + +/* +** Create a new BTree table.  Write into *piTable the page +** number for the root page of the new table. +** +** In the current implementation, BTree tables and BTree indices are the  +** the same.  In the future, we may change this so that BTree tables +** are restricted to having a 4-byte integer key and arbitrary data and +** BTree indices are restricted to having an arbitrary key and no data. +** But for now, this routine also serves to create indices. +*/ +static int fileBtreeCreateTable(Btree *pBt, int *piTable){ +  MemPage *pRoot; +  Pgno pgnoRoot; +  int rc; +  if( !pBt->inTrans ){ +    /* Must start a transaction first */ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  if( pBt->readOnly ){ +    return STQLITE_READONLY; +  } +  rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0); +  if( rc ) return rc; +  assert( sqlitepager_iswriteable(pRoot) ); +  zeroPage(pBt, pRoot); +  sqlitepager_unref(pRoot); +  *piTable = (int)pgnoRoot; +  return STQLITE_OK; +} + +/* +** Erase the given database page and all its tqchildren.  Return +** the page to the freelist. +*/ +static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){ +  MemPage *pPage; +  int rc; +  Cell *pCell; +  int idx; + +  rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage); +  if( rc ) return rc; +  rc = sqlitepager_write(pPage); +  if( rc ) return rc; +  rc = initPage(pBt, pPage, pgno, 0); +  if( rc ) return rc; +  idx = SWAB16(pBt, pPage->u.hdr.firstCell); +  while( idx>0 ){ +    pCell = (Cell*)&pPage->u.aDisk[idx]; +    idx = SWAB16(pBt, pCell->h.iNext); +    if( pCell->h.leftChild ){ +      rc = clearDatabasePage(pBt, SWAB32(pBt, pCell->h.leftChild), 1); +      if( rc ) return rc; +    } +    rc = clearCell(pBt, pCell); +    if( rc ) return rc; +  } +  if( pPage->u.hdr.rightChild ){ +    rc = clearDatabasePage(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1); +    if( rc ) return rc; +  } +  if( freePageFlag ){ +    rc = freePage(pBt, pPage, pgno); +  }else{ +    zeroPage(pBt, pPage); +  } +  sqlitepager_unref(pPage); +  return rc; +} + +/* +** Delete all information from a single table in the database. +*/ +static int fileBtreeClearTable(Btree *pBt, int iTable){ +  int rc; +  BtCursor *pCur; +  if( !pBt->inTrans ){ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ +    if( pCur->pgnoRoot==(Pgno)iTable ){ +      if( pCur->wrFlag==0 ) return STQLITE_LOCKED; +      moveToRoot(pCur); +    } +  } +  rc = clearDatabasePage(pBt, (Pgno)iTable, 0); +  if( rc ){ +    fileBtreeRollback(pBt); +  } +  return rc; +} + +/* +** Erase all information in a table and add the root of the table to +** the freelist.  Except, the root of the principle table (the one on +** page 2) is never added to the freelist. +*/ +static int fileBtreeDropTable(Btree *pBt, int iTable){ +  int rc; +  MemPage *pPage; +  BtCursor *pCur; +  if( !pBt->inTrans ){ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ +    if( pCur->pgnoRoot==(Pgno)iTable ){ +      return STQLITE_LOCKED;  /* Cannot drop a table that has a cursor */ +    } +  } +  rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage); +  if( rc ) return rc; +  rc = fileBtreeClearTable(pBt, iTable); +  if( rc ) return rc; +  if( iTable>2 ){ +    rc = freePage(pBt, pPage, iTable); +  }else{ +    zeroPage(pBt, pPage); +  } +  sqlitepager_unref(pPage); +  return rc;   +} + +#if 0 /* UNTESTED */ +/* +** Copy all cell data from one database file into another. +** pages back the freelist. +*/ +static int copyCell(Btree *pBtFrom, BTree *pBtTo, Cell *pCell){ +  Pager *pFromPager = pBtFrom->pPager; +  OverflowPage *pOvfl; +  Pgno ovfl, nextOvfl; +  Pgno *pPrev; +  int rc = STQLITE_OK; +  MemPage *pNew, *pPrevPg; +  Pgno new; + +  if( NKEY(pBtTo, pCell->h) + NDATA(pBtTo, pCell->h) <= MX_LOCAL_PAYLOAD ){ +    return STQLITE_OK; +  } +  pPrev = &pCell->ovfl; +  pPrevPg = 0; +  ovfl = SWAB32(pBtTo, pCell->ovfl); +  while( ovfl && rc==STQLITE_OK ){ +    rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl); +    if( rc ) return rc; +    nextOvfl = SWAB32(pBtFrom, pOvfl->iNext); +    rc = allocatePage(pBtTo, &pNew, &new, 0); +    if( rc==STQLITE_OK ){ +      rc = sqlitepager_write(pNew); +      if( rc==STQLITE_OK ){ +        memcpy(pNew, pOvfl, STQLITE_USABLE_SIZE); +        *pPrev = SWAB32(pBtTo, new); +        if( pPrevPg ){ +          sqlitepager_unref(pPrevPg); +        } +        pPrev = &pOvfl->iNext; +        pPrevPg = pNew; +      } +    } +    sqlitepager_unref(pOvfl); +    ovfl = nextOvfl; +  } +  if( pPrevPg ){ +    sqlitepager_unref(pPrevPg); +  } +  return rc; +} +#endif + + +#if 0 /* UNTESTED */ +/* +** Copy a page of data from one database over to another. +*/ +static int copyDatabasePage( +  Btree *pBtFrom, +  Pgno pgnoFrom, +  Btree *pBtTo, +  Pgno *pTo +){ +  MemPage *pPageFrom, *pPage; +  Pgno to; +  int rc; +  Cell *pCell; +  int idx; + +  rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom); +  if( rc ) return rc; +  rc = allocatePage(pBt, &pPage, pTo, 0); +  if( rc==STQLITE_OK ){ +    rc = sqlitepager_write(pPage); +  } +  if( rc==STQLITE_OK ){ +    memcpy(pPage, pPageFrom, STQLITE_USABLE_SIZE); +    idx = SWAB16(pBt, pPage->u.hdr.firstCell); +    while( idx>0 ){ +      pCell = (Cell*)&pPage->u.aDisk[idx]; +      idx = SWAB16(pBt, pCell->h.iNext); +      if( pCell->h.leftChild ){ +        Pgno newChld; +        rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild), +                              pBtTo, &newChld); +        if( rc ) return rc; +        pCell->h.leftChild = SWAB32(pBtFrom, newChld); +      } +      rc = copyCell(pBtFrom, pBtTo, pCell); +      if( rc ) return rc; +    } +    if( pPage->u.hdr.rightChild ){ +      Pgno newChld; +      rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pPage->u.hdr.rightChild),  +                            pBtTo, &newChld); +      if( rc ) return rc; +      pPage->u.hdr.rightChild = SWAB32(pBtTo, newChild); +    } +  } +  sqlitepager_unref(pPage); +  return rc; +} +#endif + +/* +** Read the meta-information out of a database file. +*/ +static int fileBtreeGetMeta(Btree *pBt, int *aMeta){ +  PageOne *pP1; +  int rc; +  int i; + +  rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1); +  if( rc ) return rc; +  aMeta[0] = SWAB32(pBt, pP1->nFree); +  for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){ +    aMeta[i+1] = SWAB32(pBt, pP1->aMeta[i]); +  } +  sqlitepager_unref(pP1); +  return STQLITE_OK; +} + +/* +** Write meta-information back into the database. +*/ +static int fileBtreeUpdateMeta(Btree *pBt, int *aMeta){ +  PageOne *pP1; +  int rc, i; +  if( !pBt->inTrans ){ +    return pBt->readOnly ? STQLITE_READONLY : STQLITE_ERROR; +  } +  pP1 = pBt->page1; +  rc = sqlitepager_write(pP1); +  if( rc ) return rc;    +  for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){ +    pP1->aMeta[i] = SWAB32(pBt, aMeta[i+1]); +  } +  return STQLITE_OK; +} + +/****************************************************************************** +** The complete implementation of the BTree subsystem is above this line. +** All the code the follows is for testing and troubleshooting the BTree +** subsystem.  None of the code that follows is used during normal operation. +******************************************************************************/ + +/* +** Print a disassembly of the given page on standard output.  This routine +** is used for debugging and testing only. +*/ +#ifdef STQLITE_TEST +static int fileBtreePageDump(Btree *pBt, int pgno, int recursive){ +  int rc; +  MemPage *pPage; +  int i, j; +  int nFree; +  u16 idx; +  char range[20]; +  unsigned char payload[20]; +  rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage); +  if( rc ){ +    return rc; +  } +  if( recursive ) printf("PAGE %d:\n", pgno); +  i = 0; +  idx = SWAB16(pBt, pPage->u.hdr.firstCell); +  while( idx>0 && idx<=STQLITE_USABLE_SIZE-MIN_CELL_SIZE ){ +    Cell *pCell = (Cell*)&pPage->u.aDisk[idx]; +    int sz = cellSize(pBt, pCell); +    sprintf(range,"%d..%d", idx, idx+sz-1); +    sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h); +    if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1; +    memcpy(payload, pCell->aPayload, sz); +    for(j=0; j<sz; j++){ +      if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.'; +    } +    payload[sz] = 0; +    printf( +      "cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n", +      i, range, (int)pCell->h.leftChild,  +      NKEY(pBt, pCell->h), NDATA(pBt, pCell->h), +      payload +    ); +    if( pPage->isInit && pPage->apCell[i]!=pCell ){ +      printf("**** apCell[%d] does not match on prior entry ****\n", i); +    } +    i++; +    idx = SWAB16(pBt, pCell->h.iNext); +  } +  if( idx!=0 ){ +    printf("ERROR: next cell index out of range: %d\n", idx); +  } +  printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild)); +  nFree = 0; +  i = 0; +  idx = SWAB16(pBt, pPage->u.hdr.firstFree); +  while( idx>0 && idx<STQLITE_USABLE_SIZE ){ +    FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx]; +    sprintf(range,"%d..%d", idx, idx+p->iSize-1); +    nFree += SWAB16(pBt, p->iSize); +    printf("freeblock %2d: i=%-10s size=%-4d total=%d\n", +       i, range, SWAB16(pBt, p->iSize), nFree); +    idx = SWAB16(pBt, p->iNext); +    i++; +  } +  if( idx!=0 ){ +    printf("ERROR: next freeblock index out of range: %d\n", idx); +  } +  if( recursive && pPage->u.hdr.rightChild!=0 ){ +    idx = SWAB16(pBt, pPage->u.hdr.firstCell); +    while( idx>0 && idx<STQLITE_USABLE_SIZE-MIN_CELL_SIZE ){ +      Cell *pCell = (Cell*)&pPage->u.aDisk[idx]; +      fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1); +      idx = SWAB16(pBt, pCell->h.iNext); +    } +    fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1); +  } +  sqlitepager_unref(pPage); +  return STQLITE_OK; +} +#endif + +#ifdef STQLITE_TEST +/* +** Fill aResult[] with information about the entry and page that the +** cursor is pointing to. +**  +**   aResult[0] =  The page number +**   aResult[1] =  The entry number +**   aResult[2] =  Total number of entries on this page +**   aResult[3] =  Size of this entry +**   aResult[4] =  Number of free bytes on this page +**   aResult[5] =  Number of free blocks on the page +**   aResult[6] =  Page number of the left child of this entry +**   aResult[7] =  Page number of the right child for the whole page +** +** This routine is used for testing and debugging only. +*/ +static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){ +  int cnt, idx; +  MemPage *pPage = pCur->pPage; +  Btree *pBt = pCur->pBt; +  aResult[0] = sqlitepager_pagenumber(pPage); +  aResult[1] = pCur->idx; +  aResult[2] = pPage->nCell; +  if( pCur->idx>=0 && pCur->idx<pPage->nCell ){ +    aResult[3] = cellSize(pBt, pPage->apCell[pCur->idx]); +    aResult[6] = SWAB32(pBt, pPage->apCell[pCur->idx]->h.leftChild); +  }else{ +    aResult[3] = 0; +    aResult[6] = 0; +  } +  aResult[4] = pPage->nFree; +  cnt = 0; +  idx = SWAB16(pBt, pPage->u.hdr.firstFree); +  while( idx>0 && idx<STQLITE_USABLE_SIZE ){ +    cnt++; +    idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext); +  } +  aResult[5] = cnt; +  aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild); +  return STQLITE_OK; +} +#endif + +/* +** Return the pager associated with a BTree.  This routine is used for +** testing and debugging only. +*/ +static Pager *fileBtreePager(Btree *pBt){ +  return pBt->pPager; +} + +/* +** This structure is passed around through all the sanity checking routines +** in order to keep track of some global state information. +*/ +typedef struct IntegrityCk IntegrityCk; +struct IntegrityCk { +  Btree *pBt;    /* The tree being checked out */ +  Pager *pPager; /* The associated pager.  Also accessible by pBt->pPager */ +  int nPage;     /* Number of pages in the database */ +  int *anRef;    /* Number of times each page is referenced */ +  char *zErrMsg; /* An error message.  NULL of no errors seen. */ +}; + +/* +** Append a message to the error message string. +*/ +static void checkAppendMsg(IntegrityCk *pCheck, char *zMsg1, char *zMsg2){ +  if( pCheck->zErrMsg ){ +    char *zOld = pCheck->zErrMsg; +    pCheck->zErrMsg = 0; +    sqliteSetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0); +    sqliteFree(zOld); +  }else{ +    sqliteSetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0); +  } +} + +/* +** Add 1 to the reference count for page iPage.  If this is the second +** reference to the page, add an error message to pCheck->zErrMsg. +** Return 1 if there are 2 ore more references to the page and 0 if +** if this is the first reference to the page. +** +** Also check that the page number is in bounds. +*/ +static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){ +  if( iPage==0 ) return 1; +  if( iPage>pCheck->nPage || iPage<0 ){ +    char zBuf[100]; +    sprintf(zBuf, "invalid page number %d", iPage); +    checkAppendMsg(pCheck, zContext, zBuf); +    return 1; +  } +  if( pCheck->anRef[iPage]==1 ){ +    char zBuf[100]; +    sprintf(zBuf, "2nd reference to page %d", iPage); +    checkAppendMsg(pCheck, zContext, zBuf); +    return 1; +  } +  return  (pCheck->anRef[iPage]++)>1; +} + +/* +** Check the integrity of the freelist or of an overflow page list. +** Verify that the number of pages on the list is N. +*/ +static void checkList( +  IntegrityCk *pCheck,  /* Integrity checking context */ +  int isFreeList,       /* True for a freelist.  False for overflow page list */ +  int iPage,            /* Page number for first page in the list */ +  int N,                /* Expected number of pages in the list */ +  char *zContext        /* Context for error messages */ +){ +  int i; +  char zMsg[100]; +  while( N-- > 0 ){ +    OverflowPage *pOvfl; +    if( iPage<1 ){ +      sprintf(zMsg, "%d pages missing from overflow list", N+1); +      checkAppendMsg(pCheck, zContext, zMsg); +      break; +    } +    if( checkRef(pCheck, iPage, zContext) ) break; +    if( sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){ +      sprintf(zMsg, "failed to get page %d", iPage); +      checkAppendMsg(pCheck, zContext, zMsg); +      break; +    } +    if( isFreeList ){ +      FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload; +      int n = SWAB32(pCheck->pBt, pInfo->nFree); +      for(i=0; i<n; i++){ +        checkRef(pCheck, SWAB32(pCheck->pBt, pInfo->aFree[i]), zContext); +      } +      N -= n; +    } +    iPage = SWAB32(pCheck->pBt, pOvfl->iNext); +    sqlitepager_unref(pOvfl); +  } +} + +/* +** Return negative if zKey1<zKey2. +** Return zero if zKey1==zKey2. +** Return positive if zKey1>zKey2. +*/ +static int keyCompare( +  const char *zKey1, int nKey1, +  const char *zKey2, int nKey2 +){ +  int min = nKey1>nKey2 ? nKey2 : nKey1; +  int c = memcmp(zKey1, zKey2, min); +  if( c==0 ){ +    c = nKey1 - nKey2; +  } +  return c; +} + +/* +** Do various sanity checks on a single page of a tree.  Return +** the tree depth.  Root pages return 0.  Parents of root pages +** return 1, and so forth. +**  +** These checks are done: +** +**      1.  Make sure that cells and freeblocks do not overlap +**          but combine to completely cover the page. +**      2.  Make sure cell keys are in order. +**      3.  Make sure no key is less than or equal to zLowerBound. +**      4.  Make sure no key is greater than or equal to zUpperBound. +**      5.  Check the integrity of overflow pages. +**      6.  Recursively call checkTreePage on all tqchildren. +**      7.  Verify that the depth of all tqchildren is the same. +**      8.  Make sure this page is at least 33% full or else it is +**          the root of the tree. +*/ +static int checkTreePage( +  IntegrityCk *pCheck,  /* Context for the sanity check */ +  int iPage,            /* Page number of the page to check */ +  MemPage *pParent,     /* Parent page */ +  char *zParentContext, /* Parent context */ +  char *zLowerBound,    /* All keys should be greater than this, if not NULL */ +  int nLower,           /* Number of characters in zLowerBound */ +  char *zUpperBound,    /* All keys should be less than this, if not NULL */ +  int nUpper            /* Number of characters in zUpperBound */ +){ +  MemPage *pPage; +  int i, rc, depth, d2, pgno; +  char *zKey1, *zKey2; +  int nKey1, nKey2; +  BtCursor cur; +  Btree *pBt; +  char zMsg[100]; +  char zContext[100]; +  char hit[STQLITE_USABLE_SIZE]; + +  /* Check that the page exists +  */ +  cur.pBt = pBt = pCheck->pBt; +  if( iPage==0 ) return 0; +  if( checkRef(pCheck, iPage, zParentContext) ) return 0; +  sprintf(zContext, "On tree page %d: ", iPage); +  if( (rc = sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pPage))!=0 ){ +    sprintf(zMsg, "unable to get the page. error code=%d", rc); +    checkAppendMsg(pCheck, zContext, zMsg); +    return 0; +  } +  if( (rc = initPage(pBt, pPage, (Pgno)iPage, pParent))!=0 ){ +    sprintf(zMsg, "initPage() returns error code %d", rc); +    checkAppendMsg(pCheck, zContext, zMsg); +    sqlitepager_unref(pPage); +    return 0; +  } + +  /* Check out all the cells. +  */ +  depth = 0; +  if( zLowerBound ){ +    zKey1 = sqliteMalloc( nLower+1 ); +    memcpy(zKey1, zLowerBound, nLower); +    zKey1[nLower] = 0; +  }else{ +    zKey1 = 0; +  } +  nKey1 = nLower; +  cur.pPage = pPage; +  for(i=0; i<pPage->nCell; i++){ +    Cell *pCell = pPage->apCell[i]; +    int sz; + +    /* Check payload overflow pages +    */ +    nKey2 = NKEY(pBt, pCell->h); +    sz = nKey2 + NDATA(pBt, pCell->h); +    sprintf(zContext, "On page %d cell %d: ", iPage, i); +    if( sz>MX_LOCAL_PAYLOAD ){ +      int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE; +      checkList(pCheck, 0, SWAB32(pBt, pCell->ovfl), nPage, zContext); +    } + +    /* Check that keys are in the right order +    */ +    cur.idx = i; +    zKey2 = sqliteMallocRaw( nKey2+1 ); +    getPayload(&cur, 0, nKey2, zKey2); +    if( zKey1 && keyCompare(zKey1, nKey1, zKey2, nKey2)>=0 ){ +      checkAppendMsg(pCheck, zContext, "Key is out of order"); +    } + +    /* Check sanity of left child page. +    */ +    pgno = SWAB32(pBt, pCell->h.leftChild); +    d2 = checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zKey2,nKey2); +    if( i>0 && d2!=depth ){ +      checkAppendMsg(pCheck, zContext, "Child page depth differs"); +    } +    depth = d2; +    sqliteFree(zKey1); +    zKey1 = zKey2; +    nKey1 = nKey2; +  } +  pgno = SWAB32(pBt, pPage->u.hdr.rightChild); +  sprintf(zContext, "On page %d at right child: ", iPage); +  checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper); +  sqliteFree(zKey1); +  +  /* Check for complete coverage of the page +  */ +  memset(hit, 0, sizeof(hit)); +  memset(hit, 1, sizeof(PageHdr)); +  for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i<STQLITE_USABLE_SIZE; ){ +    Cell *pCell = (Cell*)&pPage->u.aDisk[i]; +    int j; +    for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++; +    i = SWAB16(pBt, pCell->h.iNext); +  } +  for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i<STQLITE_USABLE_SIZE; ){ +    FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i]; +    int j; +    for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++; +    i = SWAB16(pBt,pFBlk->iNext); +  } +  for(i=0; i<STQLITE_USABLE_SIZE; i++){ +    if( hit[i]==0 ){ +      sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage); +      checkAppendMsg(pCheck, zMsg, 0); +      break; +    }else if( hit[i]>1 ){ +      sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage); +      checkAppendMsg(pCheck, zMsg, 0); +      break; +    } +  } + +  /* Check that free space is kept to a minimum +  */ +#if 0 +  if( pParent && pParent->nCell>2 && pPage->nFree>3*STQLITE_USABLE_SIZE/4 ){ +    sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree, +       STQLITE_USABLE_SIZE/3); +    checkAppendMsg(pCheck, zContext, zMsg); +  } +#endif + +  sqlitepager_unref(pPage); +  return depth; +} + +/* +** This routine does a complete check of the given BTree file.  aRoot[] is +** an array of pages numbers were each page number is the root page of +** a table.  nRoot is the number of entries in aRoot. +** +** If everything checks out, this routine returns NULL.  If something is +** amiss, an error message is written into memory obtained from malloc() +** and a pointer to that error message is returned.  The calling function +** is responsible for freeing the error message when it is done. +*/ +char *fileBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){ +  int i; +  int nRef; +  IntegrityCk sCheck; + +  nRef = *sqlitepager_stats(pBt->pPager); +  if( lockBtree(pBt)!=STQLITE_OK ){ +    return sqliteStrDup("Unable to acquire a read lock on the database"); +  } +  sCheck.pBt = pBt; +  sCheck.pPager = pBt->pPager; +  sCheck.nPage = sqlitepager_pagecount(sCheck.pPager); +  if( sCheck.nPage==0 ){ +    unlockBtreeIfUnused(pBt); +    return 0; +  } +  sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) ); +  sCheck.anRef[1] = 1; +  for(i=2; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; } +  sCheck.zErrMsg = 0; + +  /* Check the integrity of the freelist +  */ +  checkList(&sCheck, 1, SWAB32(pBt, pBt->page1->freeList), +            SWAB32(pBt, pBt->page1->nFree), "Main freelist: "); + +  /* Check all the tables. +  */ +  for(i=0; i<nRoot; i++){ +    if( aRoot[i]==0 ) continue; +    checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ", 0,0,0,0); +  } + +  /* Make sure every page in the file is referenced +  */ +  for(i=1; i<=sCheck.nPage; i++){ +    if( sCheck.anRef[i]==0 ){ +      char zBuf[100]; +      sprintf(zBuf, "Page %d is never used", i); +      checkAppendMsg(&sCheck, zBuf, 0); +    } +  } + +  /* Make sure this analysis did not leave any unref() pages +  */ +  unlockBtreeIfUnused(pBt); +  if( nRef != *sqlitepager_stats(pBt->pPager) ){ +    char zBuf[100]; +    sprintf(zBuf,  +      "Outstanding page count goes from %d to %d during this analysis", +      nRef, *sqlitepager_stats(pBt->pPager) +    ); +    checkAppendMsg(&sCheck, zBuf, 0); +  } + +  /* Clean  up and report errors. +  */ +  sqliteFree(sCheck.anRef); +  return sCheck.zErrMsg; +} + +/* +** Return the full pathname of the underlying database file. +*/ +static const char *fileBtreeGetFilename(Btree *pBt){ +  assert( pBt->pPager!=0 ); +  return sqlitepager_filename(pBt->pPager); +} + +/* +** Copy the complete content of pBtFrom into pBtTo.  A transaction +** must be active for both files. +** +** The size of file pBtFrom may be reduced by this operation. +** If anything goes wrong, the transaction on pBtFrom is rolled back. +*/ +static int fileBtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){ +  int rc = STQLITE_OK; +  Pgno i, nPage, nToPage; + +  if( !pBtTo->inTrans || !pBtFrom->inTrans ) return STQLITE_ERROR; +  if( pBtTo->needSwab!=pBtFrom->needSwab ) return STQLITE_ERROR; +  if( pBtTo->pCursor ) return STQLITE_BUSY; +  memcpy(pBtTo->page1, pBtFrom->page1, STQLITE_USABLE_SIZE); +  rc = sqlitepager_overwrite(pBtTo->pPager, 1, pBtFrom->page1); +  nToPage = sqlitepager_pagecount(pBtTo->pPager); +  nPage = sqlitepager_pagecount(pBtFrom->pPager); +  for(i=2; rc==STQLITE_OK && i<=nPage; i++){ +    void *pPage; +    rc = sqlitepager_get(pBtFrom->pPager, i, &pPage); +    if( rc ) break; +    rc = sqlitepager_overwrite(pBtTo->pPager, i, pPage); +    if( rc ) break; +    sqlitepager_unref(pPage); +  } +  for(i=nPage+1; rc==STQLITE_OK && i<=nToPage; i++){ +    void *pPage; +    rc = sqlitepager_get(pBtTo->pPager, i, &pPage); +    if( rc ) break; +    rc = sqlitepager_write(pPage); +    sqlitepager_unref(pPage); +    sqlitepager_dont_write(pBtTo->pPager, i); +  } +  if( !rc && nPage<nToPage ){ +    rc = sqlitepager_truncate(pBtTo->pPager, nPage); +  } +  if( rc ){ +    fileBtreeRollback(pBtTo); +  } +  return rc;   +} + +/* +** The following tables contain pointers to all of the interface +** routines for this implementation of the B*Tree backend.  To +** substitute a different implemention of the backend, one has merely +** to provide pointers to alternative functions in similar tables. +*/ +static BtOps sqliteBtreeOps = { +    fileBtreeClose, +    fileBtreeSetCacheSize, +    fileBtreeSetSafetyLevel, +    fileBtreeBeginTrans, +    fileBtreeCommit, +    fileBtreeRollback, +    fileBtreeBeginCkpt, +    fileBtreeCommitCkpt, +    fileBtreeRollbackCkpt, +    fileBtreeCreateTable, +    fileBtreeCreateTable,  /* Really sqliteBtreeCreateIndex() */ +    fileBtreeDropTable, +    fileBtreeClearTable, +    fileBtreeCursor, +    fileBtreeGetMeta, +    fileBtreeUpdateMeta, +    fileBtreeIntegrityCheck, +    fileBtreeGetFilename, +    fileBtreeCopyFile, +    fileBtreePager, +#ifdef STQLITE_TEST +    fileBtreePageDump, +#endif +}; +static BtCursorOps sqliteBtreeCursorOps = { +    fileBtreeMoveto, +    fileBtreeDelete, +    fileBtreeInsert, +    fileBtreeFirst, +    fileBtreeLast, +    fileBtreeNext, +    fileBtreePrevious, +    fileBtreeKeySize, +    fileBtreeKey, +    fileBtreeKeyCompare, +    fileBtreeDataSize, +    fileBtreeData, +    fileBtreeCloseCursor, +#ifdef STQLITE_TEST +    fileBtreeCursorDump, +#endif +};  | 
