journal.c 55.6 KB
Newer Older
Andreas Gustafsson's avatar
Andreas Gustafsson committed
1
/*
Mark Andrews's avatar
Mark Andrews committed
2
 * Copyright (C) 2004, 2005  Internet Systems Consortium, Inc. ("ISC")
Mark Andrews's avatar
Mark Andrews committed
3
 * Copyright (C) 1999-2002  Internet Software Consortium.
4
 *
Andreas Gustafsson's avatar
Andreas Gustafsson committed
5 6 7
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
8
 *
Mark Andrews's avatar
Mark Andrews committed
9 10 11 12 13 14 15
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
16 17
 */

18
/* $Id: journal.c,v 1.93 2005/06/08 02:06:58 marka Exp $ */
David Lawrence's avatar
David Lawrence committed
19

Andreas Gustafsson's avatar
Andreas Gustafsson committed
20 21 22
#include <config.h>

#include <stdlib.h>
23
#include <unistd.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
24

25
#include <isc/file.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
26
#include <isc/mem.h>
27
#include <isc/stdio.h>
28
#include <isc/string.h>
Bob Halley's avatar
Bob Halley committed
29
#include <isc/util.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
30

31
#include <dns/compress.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
32 33
#include <dns/db.h>
#include <dns/dbiterator.h>
34
#include <dns/diff.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
35
#include <dns/fixedname.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
36 37
#include <dns/journal.h>
#include <dns/log.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
38 39
#include <dns/rdataset.h>
#include <dns/rdatasetiter.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
40
#include <dns/result.h>
41
#include <dns/soa.h>
Andreas Gustafsson's avatar
Andreas Gustafsson committed
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
/*! \file 
 * \brief Journalling.
 *
 * A journal file consists of
 *
 *   \li A fixed-size header of type journal_rawheader_t.
 *
 *   \li The index.  This is an unordered array of index entries
 *     of type journal_rawpos_t giving the locations
 *     of some arbitrary subset of the journal's addressable
 *     transactions.  The index entries are used as hints to
 *     speed up the process of locating a transaction with a given
 *     serial number.  Unused index entries have an "offset"
 *     field of zero.  The size of the index can vary between
 *     journal files, but does not change during the lifetime
 *     of a file.  The size can be zero.
 *
 *   \li The journal data.  This  consists of one or more transactions.
 *     Each transaction begins with a transaction header of type
 *     journal_rawxhdr_t.  The transaction header is followed by a
 *     sequence of RRs, similar in structure to an IXFR difference
 *     sequence (RFC1995).  That is, the pre-transaction SOA,
 *     zero or more other deleted RRs, the post-transaction SOA,
 *     and zero or more other added RRs.  Unlike in IXFR, each RR
 *     is prefixed with a 32-bit length.
 *
 *     The journal data part grows as new transactions are
 *     appended to the file.  Only those transactions
 *     whose serial number is current-(2^31-1) to current
 *     are considered "addressable" and may be pointed
 *     to from the header or index.  They may be preceded
 *     by old transactions that are no longer addressable,
 *     and they may be followed by transactions that were
 *     appended to the journal but never committed by updating
 *     the "end" position in the header.  The latter will
 *     be overwritten when new transactions are added.
 */
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
81 82 83 84 85 86 87 88 89 90 91
 * When true, accept IXFR difference sequences where the
 * SOA serial number does not change (BIND 8 sends such
 * sequences).
 */
static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */

/**************************************************************************/
/*
 * Miscellaneous utilities.
 */

Andreas Gustafsson's avatar
Andreas Gustafsson committed
92 93 94 95 96 97
#define JOURNAL_COMMON_LOGARGS \
	dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL

#define JOURNAL_DEBUG_LOGARGS(n) \
	JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)

98
/*%
99 100 101 102 103 104 105 106
 * It would be non-sensical (or at least obtuse) to use FAIL() with an
 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
 * from complaining about "end-of-loop code not reached".
 */
#define FAIL(code) \
	do { result = (code);					\
		if (result != ISC_R_SUCCESS) goto failure;	\
	} while (0)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
107 108 109 110 111

#define CHECK(op) \
     	do { result = (op); 					\
		if (result != ISC_R_SUCCESS) goto failure; 	\
	} while (0)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
112

113 114
static isc_result_t index_to_disk(dns_journal_t *);

115
static inline isc_uint32_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
116 117 118 119 120 121 122
decode_uint32(unsigned char *p) {
	return ((p[0] << 24) +
		(p[1] << 16) +
		(p[2] <<  8) +
		(p[3] <<  0));
}

123
static inline void
Andreas Gustafsson's avatar
Andreas Gustafsson committed
124
encode_uint32(isc_uint32_t val, unsigned char *p) {
125 126 127 128
	p[0] = (isc_uint8_t)(val >> 24);
	p[1] = (isc_uint8_t)(val >> 16);
	p[2] = (isc_uint8_t)(val >>  8);
	p[3] = (isc_uint8_t)(val >>  0);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
129 130
}

131
isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
132 133 134
dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
		      dns_diffop_t op, dns_difftuple_t **tp)
{
135
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
136 137
	dns_dbnode_t *node;
	dns_rdataset_t rdataset;
138
	dns_rdata_t rdata = DNS_RDATA_INIT;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
139
	dns_name_t *zonename;
140

Andreas Gustafsson's avatar
Andreas Gustafsson committed
141
	zonename = dns_db_origin(db);
142

Andreas Gustafsson's avatar
Andreas Gustafsson committed
143 144
	node = NULL;
	result = dns_db_findnode(db, zonename, ISC_FALSE, &node);
145
	if (result != ISC_R_SUCCESS)
146
		goto nonode;
147

Andreas Gustafsson's avatar
Andreas Gustafsson committed
148
	dns_rdataset_init(&rdataset);
Bob Halley's avatar
Bob Halley committed
149
	result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
David Lawrence's avatar
David Lawrence committed
150
				     (isc_stdtime_t)0, &rdataset, NULL);
151
 	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
152
		goto freenode;
153

Andreas Gustafsson's avatar
Andreas Gustafsson committed
154
	result = dns_rdataset_first(&rdataset);
155
 	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
156 157 158 159 160 161 162 163
		goto freenode;

	dns_rdataset_current(&rdataset, &rdata);

	result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl,
				      &rdata, tp);

	dns_rdataset_disassociate(&rdataset);
164
	dns_db_detachnode(db, &node);
165 166
	return (ISC_R_SUCCESS);

Andreas Gustafsson's avatar
Andreas Gustafsson committed
167 168
 freenode:
	dns_db_detachnode(db, &node);
169 170
 nonode:
	UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
Andreas Gustafsson's avatar
Andreas Gustafsson committed
171 172 173
	return (result);
}

174
/* Journalling */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
175

176
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
177 178 179 180 181 182
 * On-disk representation of a "pointer" to a journal entry.
 * These are used in the journal header to locate the beginning
 * and end of the journal, and in the journal index to locate
 * other transactions.
 */
typedef struct {
183
	unsigned char	serial[4];  /*%< SOA serial before update. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
184 185
	/*
	 * XXXRTH  Should offset be 8 bytes?
186
	 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
187 188 189
	 * XXXAG  ... but we will not be able to seek >2G anyway on many
	 *            platforms as long as we are using fseek() rather
	 *            than lseek().
Andreas Gustafsson's avatar
Andreas Gustafsson committed
190
	 */
191
	unsigned char	offset[4];  /*%< Offset from beginning of file. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
192 193 194
} journal_rawpos_t;


195
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
196 197 198 199 200
 * The header is of a fixed size, with some spare room for future
 * extensions.
 */
#define JOURNAL_HEADER_SIZE 64 /* Bytes. */

201 202 203 204
/*%
 * The on-disk representation of the journal header.
 * All numbers are stored in big-endian order.
 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
205 206
typedef union {
	struct {
207
		/*% File format version ID. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
208
		unsigned char 		format[16];
209
		/*% Position of the first addressable transaction */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
210
		journal_rawpos_t 	begin;
211
		/*% Position of the next (yet nonexistent) transaction. */
212
		journal_rawpos_t 	end;
213
		/*% Number of index entries following the header. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
214 215 216 217 218 219
		unsigned char 		index_size[4];
	} h;
	/* Pad the header to a fixed size. */
	unsigned char pad[JOURNAL_HEADER_SIZE];
} journal_rawheader_t;

220
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
221 222 223 224
 * The on-disk representation of the transaction header.
 * There is one of these at the beginning of each transaction.
 */
typedef struct {
225 226 227
	unsigned char	size[4]; 	/*%< In bytes, excluding header. */
	unsigned char	serial0[4];	/*%< SOA serial before update. */
	unsigned char	serial1[4];	/*%< SOA serial after update. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
228 229
} journal_rawxhdr_t;

230
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
231 232 233 234
 * The on-disk representation of the RR header.
 * There is one of these at the beginning of each RR.
 */
typedef struct {
235
	unsigned char	size[4]; 	/*%< In bytes, excluding header. */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
236 237
} journal_rawrrhdr_t;

238
/*%
239 240
 * The in-core representation of the journal header.
 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
241 242
typedef struct {
	isc_uint32_t	serial;
243
	isc_offset_t	offset;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
244 245 246 247 248 249 250 251 252 253 254 255
} journal_pos_t;

#define POS_VALID(pos) 		((pos).offset != 0)
#define POS_INVALIDATE(pos) 	((pos).offset = 0, (pos).serial = 0)

typedef struct {
	unsigned char 	format[16];
	journal_pos_t 	begin;
	journal_pos_t 	end;
	isc_uint32_t	index_size;
} journal_header_t;

256
/*%
257 258
 * The in-core representation of the transaction header.
 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
259 260 261

typedef struct {
	isc_uint32_t	size;
262 263
	isc_uint32_t	serial0;
	isc_uint32_t	serial1;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
264 265
} journal_xhdr_t;

266
/*%
267 268
 * The in-core representation of the RR header.
 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
269 270 271 272 273
typedef struct {
	isc_uint32_t	size;
} journal_rrhdr_t;


274
/*%
Andreas Gustafsson's avatar
Andreas Gustafsson committed
275 276 277 278 279
 * Initial contents to store in the header of a newly created
 * journal file.
 *
 * The header starts with the magic string ";BIND LOG V9\n"
 * to identify the file as a BIND 9 journal file.  An ASCII
280
 * identification string is used rather than a binary magic
Andreas Gustafsson's avatar
Andreas Gustafsson committed
281 282 283
 * number to be consistent with BIND 8 (BIND 8 journal files
 * are ASCII text files).
 */
284

Andreas Gustafsson's avatar
Andreas Gustafsson committed
285 286 287 288 289 290 291 292 293
static journal_header_t
initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 };

#define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)

typedef enum {
	JOURNAL_STATE_INVALID,
	JOURNAL_STATE_READ,
	JOURNAL_STATE_WRITE,
294
	JOURNAL_STATE_TRANSACTION
Andreas Gustafsson's avatar
Andreas Gustafsson committed
295 296 297
} journal_state_t;

struct dns_journal {
298 299
	unsigned int		magic;		/*%< JOUR */
	isc_mem_t		*mctx;		/*%< Memory context */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
300
	journal_state_t		state;
301 302 303 304 305 306 307 308
	const char 		*filename;	/*%< Journal file name */
	FILE *			fp;		/*%< File handle */
	isc_offset_t		offset;		/*%< Current file offset */
	journal_header_t 	header;		/*%< In-core journal header */
	unsigned char		*rawindex;	/*%< In-core buffer for journal index in on-disk format */
	journal_pos_t		*index;		/*%< In-core journal index */

	/*% Current transaction state (when writing). */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
309
	struct {
310 311
		unsigned int	n_soa;		/*%< Number of SOAs seen */
		journal_pos_t	pos[2];		/*%< Begin/end position */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
312
	} x;
313

314
	/*% Iteration state (when reading). */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
315 316
	struct {
		/* These define the part of the journal we iterate over. */
317 318
		journal_pos_t bpos;		/*%< Position before first, */
		journal_pos_t epos;		/*%< and after last transaction */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
319
		/* The rest is iterator state. */
320 321 322 323 324 325 326 327 328 329
		isc_uint32_t current_serial;	/*%< Current SOA serial */
		isc_buffer_t source;		/*%< Data from disk */
		isc_buffer_t target;		/*%< Data from _fromwire check */
		dns_decompress_t dctx;		/*%< Dummy decompression ctx */
		dns_name_t name;		/*%< Current domain name */
		dns_rdata_t rdata;		/*%< Current rdata */
		isc_uint32_t ttl;		/*%< Current TTL */
		unsigned int xsize;		/*%< Size of transaction data */
		unsigned int xpos;		/*%< Current position in it */
		isc_result_t result;		/*%< Result of last call */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
330 331 332
	} it;
};

333
#define DNS_JOURNAL_MAGIC	ISC_MAGIC('J', 'O', 'U', 'R')
334
#define DNS_JOURNAL_VALID(t)	ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
335 336

static void
337
journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
338 339 340 341 342
	cooked->serial = decode_uint32(raw->serial);
	cooked->offset = decode_uint32(raw->offset);
}

static void
343
journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
344 345 346 347 348
	encode_uint32(cooked->serial, raw->serial);
	encode_uint32(cooked->offset, raw->offset);
}

static void
349
journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
350 351
	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
	memcpy(cooked->format, raw->h.format, sizeof(cooked->format));
352
	journal_pos_decode(&raw->h.begin, &cooked->begin);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
353 354 355 356 357
	journal_pos_decode(&raw->h.end, &cooked->end);
	cooked->index_size = decode_uint32(raw->h.index_size);
}

static void
358
journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
359 360 361
	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
	memset(raw->pad, 0, sizeof(raw->pad));
	memcpy(raw->h.format, cooked->format, sizeof(raw->h.format));
362
	journal_pos_encode(&raw->h.begin, &cooked->begin);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
363 364 365 366
	journal_pos_encode(&raw->h.end, &cooked->end);
	encode_uint32(cooked->index_size, raw->h.index_size);
}

367 368 369
/*
 * Journal file I/O subroutines, with error checking and reporting.
 */
370
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
371
journal_seek(dns_journal_t *j, isc_uint32_t offset) {
372
	isc_result_t result;
373
	result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET);
374
	if (result != ISC_R_SUCCESS) {
375
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
376 377
			      "%s: seek: %s", j->filename,
			      isc_result_totext(result));
378
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
379 380
	}
	j->offset = offset;
381
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
382 383
}

384
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
385
journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
386
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
387

388
	result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
389 390
	if (result != ISC_R_SUCCESS) {
		if (result == ISC_R_EOF)
391
			return (ISC_R_NOMORE);
392
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
393 394
			      "%s: read: %s",
			      j->filename, isc_result_totext(result));
395
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
396 397
	}
	j->offset += nbytes;
398
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
399 400
}

401
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
402
journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
403
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
404

405
	result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
406
	if (result != ISC_R_SUCCESS) {
407
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
408 409
			      "%s: write: %s",
			      j->filename, isc_result_totext(result));
410
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
411
	}
412
	j->offset += nbytes;
413
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
414 415
}

416
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
417
journal_fsync(dns_journal_t *j) {
418
	isc_result_t result;
419
	result = isc_stdio_flush(j->fp);
420
	if (result != ISC_R_SUCCESS) {
421
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
422 423
			      "%s: flush: %s",
			      j->filename, isc_result_totext(result));
424
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
425
	}
426
	result = isc_stdio_sync(j->fp);
427
	if (result != ISC_R_SUCCESS) {
428
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
429 430
			      "%s: fsync: %s",
			      j->filename, isc_result_totext(result));
431
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
432
	}
433
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
434 435
}

436 437 438
/*
 * Read/write a transaction header at the current file position.
 */
439

440
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
441 442
journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
	journal_rawxhdr_t raw;
443
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
444
	result = journal_read(j, &raw, sizeof(raw));
445
	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
446 447 448 449
		return (result);
	xhdr->size = decode_uint32(raw.size);
	xhdr->serial0 = decode_uint32(raw.serial0);
	xhdr->serial1 = decode_uint32(raw.serial1);
450
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
451 452
}

453
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
454 455 456 457 458 459 460 461 462 463 464
journal_write_xhdr(dns_journal_t *j, isc_uint32_t size,
		   isc_uint32_t serial0, isc_uint32_t serial1)
{
	journal_rawxhdr_t raw;
	encode_uint32(size, raw.size);
	encode_uint32(serial0, raw.serial0);
	encode_uint32(serial1, raw.serial1);
	return (journal_write(j, &raw, sizeof(raw)));
}


465 466 467
/*
 * Read an RR header at the current file position.
 */
468

469
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
470 471
journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
	journal_rawrrhdr_t raw;
472
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
473
	result = journal_read(j, &raw, sizeof(raw));
474
	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
475 476
		return (result);
	rrhdr->size = decode_uint32(raw.size);
477
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
478 479
}

480
static isc_result_t
481
journal_file_create(isc_mem_t *mctx, const char *filename) {
482 483
	FILE *fp = NULL;
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
484 485 486 487 488 489 490
	journal_header_t header;
	journal_rawheader_t rawheader;
	int index_size = 56; /* XXX configurable */
	int size;
	void *mem; /* Memory for temporary index image. */

	INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
491

492
	result = isc_stdio_open(filename, "wb", &fp);
493
	if (result != ISC_R_SUCCESS) {
494
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
495 496
			      "%s: create: %s",
			      filename, isc_result_totext(result));
497
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
498 499 500 501 502 503 504 505 506 507 508
	}

	header = initial_journal_header;
	header.index_size = index_size;
	journal_header_encode(&header, &rawheader);

	size = sizeof(journal_rawheader_t) +
		index_size * sizeof(journal_rawpos_t);

	mem = isc_mem_get(mctx, size);
	if (mem == NULL) {
509
		(void)isc_stdio_close(fp);
510
		(void)isc_file_remove(filename);
511
		return (ISC_R_NOMEMORY);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
512 513 514 515
	}
	memset(mem, 0, size);
	memcpy(mem, &rawheader, sizeof(rawheader));

516
	result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL);
517
	if (result != ISC_R_SUCCESS) {
518
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
Andreas Gustafsson's avatar
Andreas Gustafsson committed
519
				 "%s: write: %s",
520
				 filename, isc_result_totext(result));
521
		(void)isc_stdio_close(fp);
522 523
		(void)isc_file_remove(filename);
		isc_mem_put(mctx, mem, size);
524
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
525
	}
526 527
	isc_mem_put(mctx, mem, size);

528
	result = isc_stdio_close(fp);
529
	if (result != ISC_R_SUCCESS) {
David Lawrence's avatar
David Lawrence committed
530
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
Andreas Gustafsson's avatar
Andreas Gustafsson committed
531
				 "%s: close: %s",
532 533
				 filename, isc_result_totext(result));
		(void)isc_file_remove(filename);
534
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
535 536
	}

537
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
538
}
539

Mark Andrews's avatar
Mark Andrews committed
540 541 542
static isc_result_t
journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write,
	     isc_boolean_t create, dns_journal_t **journalp) {
543
	FILE *fp = NULL;
544
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
545 546
	journal_rawheader_t rawheader;
	dns_journal_t *j;
547

Andreas Gustafsson's avatar
Andreas Gustafsson committed
548 549 550 551
	INSIST(journalp != NULL && *journalp == NULL);
	j = isc_mem_get(mctx, sizeof(*j));
	if (j == NULL)
		return (ISC_R_NOMEMORY);
552

Andreas Gustafsson's avatar
Andreas Gustafsson committed
553 554
	j->mctx = mctx;
	j->state = JOURNAL_STATE_INVALID;
555
	j->fp = NULL;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
556 557
	j->filename = filename;
	j->index = NULL;
558
	j->rawindex = NULL;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
559

560
	result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp);
561 562

	if (result == ISC_R_FILENOTFOUND) {
Mark Andrews's avatar
Mark Andrews committed
563
		if (create) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
564 565 566 567 568
			isc_log_write(JOURNAL_COMMON_LOGARGS,
				      ISC_LOG_INFO,
				      "journal file %s does not exist, "
				      "creating it",
				      j->filename);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
569
			CHECK(journal_file_create(mctx, filename));
570 571 572 573
			/*
			 * Retry.
			 */
			result = isc_stdio_open(j->filename, "rb+", &fp);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
574 575 576 577
		} else {
			FAIL(ISC_R_NOTFOUND);
		}
	}
578
	if (result != ISC_R_SUCCESS) {
579 580
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
			      "%s: open: %s",
581
			      j->filename, isc_result_totext(result));
582
		FAIL(ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
583 584 585 586
	}

	j->fp = fp;

587 588 589
	/*
	 * Set magic early so that seek/read can succeed.
	 */
590
	j->magic = DNS_JOURNAL_MAGIC;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
591 592 593 594 595 596

	CHECK(journal_seek(j, 0));
	CHECK(journal_read(j, &rawheader, sizeof(rawheader)));

	if (memcmp(rawheader.h.format, initial_journal_header.format,
		   sizeof(initial_journal_header.format)) != 0) {
597
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
598
				 "%s: journal format not recognized",
Andreas Gustafsson's avatar
Andreas Gustafsson committed
599
				 j->filename);
600
		FAIL(ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
601 602 603 604
	}
	journal_header_decode(&rawheader, &j->header);

	/*
605 606
	 * If there is an index, read the raw index into a dynamically
	 * allocated buffer and then convert it into a cooked index.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
607 608 609
	 */
	if (j->header.index_size != 0) {
		unsigned int i;
610 611
		unsigned int rawbytes;
		unsigned char *p;
612

613 614 615 616
		rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
		j->rawindex = isc_mem_get(mctx, rawbytes);
		if (j->rawindex == NULL)
			FAIL(ISC_R_NOMEMORY);
617

618
		CHECK(journal_read(j, j->rawindex, rawbytes));
619

Andreas Gustafsson's avatar
Andreas Gustafsson committed
620
		j->index = isc_mem_get(mctx, j->header.index_size *
621
				       sizeof(journal_pos_t));
Andreas Gustafsson's avatar
Andreas Gustafsson committed
622 623
		if (j->index == NULL)
			FAIL(ISC_R_NOMEMORY);
624 625

		p = j->rawindex;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
626
		for (i = 0; i < j->header.index_size; i++) {
627 628 629 630
			j->index[i].serial = decode_uint32(p);
			p += 4;
			j->index[i].offset = decode_uint32(p);
			p += 4;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
631
		}
632
		INSIST(p == j->rawindex + rawbytes);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
633 634 635
	}
	j->offset = -1; /* Invalid, must seek explicitly. */

636 637 638
	/*
	 * Initialize the iterator.
	 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
639 640
	dns_name_init(&j->it.name, NULL);
	dns_rdata_init(&j->it.rdata);
641

Andreas Gustafsson's avatar
Andreas Gustafsson committed
642 643 644 645 646
	/*
	 * Set up empty initial buffers for uncheched and checked
	 * wire format RR data.  They will be reallocated
	 * later.
	 */
647 648
	isc_buffer_init(&j->it.source, NULL, 0);
	isc_buffer_init(&j->it.target, NULL, 0);
649
	dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
650 651 652

	j->state =
		write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
653

Andreas Gustafsson's avatar
Andreas Gustafsson committed
654
	*journalp = j;
655
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
656 657 658 659 660 661 662 663 664

 failure:
	j->magic = 0;
	if (j->index != NULL) {
		isc_mem_put(j->mctx, j->index, j->header.index_size *
			    sizeof(journal_rawpos_t));
		j->index = NULL;
	}
	if (j->fp != NULL)
665
		(void)isc_stdio_close(j->fp);
666
	isc_mem_put(j->mctx, j, sizeof(*j));
Andreas Gustafsson's avatar
Andreas Gustafsson committed
667 668 669
	return (result);
}

Mark Andrews's avatar
Mark Andrews committed
670 671 672 673 674 675
isc_result_t
dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write,
		 dns_journal_t **journalp) {
	return (journal_open(mctx, filename, write, write, journalp));
}

Andreas Gustafsson's avatar
Andreas Gustafsson committed
676 677 678 679 680 681 682 683 684 685 686 687
/*
 * A comparison function defining the sorting order for
 * entries in the IXFR-style journal file.
 *
 * The IXFR format requires that deletions are sorted before
 * additions, and within either one, SOA records are sorted
 * before others.
 *
 * Also sort the non-SOA records by type as a courtesy to the
 * server receiving the IXFR - it may help reduce the amount of
 * rdataset merging it has to do.
 */
688
static int
689
ixfr_order(const void *av, const void *bv) {
Mark Andrews's avatar
Mark Andrews committed
690 691 692 693
	dns_difftuple_t const * const *ap = av;
	dns_difftuple_t const * const *bp = bv;
	dns_difftuple_t const *a = *ap;
	dns_difftuple_t const *b = *bp;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
694 695 696 697 698
	int r;

	r = (b->op == DNS_DIFFOP_DEL) - (a->op == DNS_DIFFOP_DEL);
	if (r != 0)
		return (r);
699

Andreas Gustafsson's avatar
Andreas Gustafsson committed
700 701 702 703 704 705 706 707 708 709 710
	r = (b->rdata.type == dns_rdatatype_soa) -
		(a->rdata.type == dns_rdatatype_soa);
	if (r != 0)
		return (r);

	r = (a->rdata.type - b->rdata.type);
	return (r);
}

/*
 * Advance '*pos' to the next journal transaction.
711
 *
Andreas Gustafsson's avatar
Andreas Gustafsson committed
712 713 714 715
 * Requires:
 *	*pos refers to a valid journal transaction.
 *
 * Ensures:
716
 *	When ISC_R_SUCCESS is returned,
Andreas Gustafsson's avatar
Andreas Gustafsson committed
717 718 719 720
 *	*pos refers to the next journal transaction.
 *
 * Returns one of:
 *
721
 *    ISC_R_SUCCESS
722
 *    ISC_R_NOMORE 	*pos pointed at the last transaction
Andreas Gustafsson's avatar
Andreas Gustafsson committed
723 724
 *    Other results due to file errors are possible.
 */
725
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
726
journal_next(dns_journal_t *j, journal_pos_t *pos) {
727
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
728
	journal_xhdr_t xhdr;
729
	REQUIRE(DNS_JOURNAL_VALID(j));
730

Andreas Gustafsson's avatar
Andreas Gustafsson committed
731
	result = journal_seek(j, pos->offset);
732
	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
733 734
		return (result);

Mark Andrews's avatar
Mark Andrews committed
735 736
	if (pos->serial == j->header.end.serial)
		return (ISC_R_NOMORE);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
737 738
	/*
	 * Read the header of the current transaction.
739
	 * This will return ISC_R_NOMORE if we are at EOF.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
740 741
	 */
	result = journal_read_xhdr(j, &xhdr);
742
	if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
743 744
		return (result);

745 746 747
	/*
	 * Check serial number consistency.
	 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
748
	if (xhdr.serial0 != pos->serial) {
749
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
750 751 752
			      "%s: journal file corrupt: "
			      "expected serial %u, got %u",
			      j->filename, pos->serial, xhdr.serial0);
753
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
754 755
	}

756 757 758
	/*
	 * Check for offset wraparound.
	 */
759 760
	if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size)
	    < pos->offset) {
David Lawrence's avatar
David Lawrence committed
761
		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
762
			      "%s: offset too large", j->filename);
763
		return (ISC_R_UNEXPECTED);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
764
	}
765

Andreas Gustafsson's avatar
Andreas Gustafsson committed
766 767
	pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size;
	pos->serial = xhdr.serial1;
768
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
}

/*
 * If the index of the journal 'j' contains an entry "better"
 * than '*best_guess', replace '*best_guess' with it.
 *
 * "Better" means having a serial number closer to 'serial'
 * but not greater than 'serial'.
 */
static void
index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) {
	unsigned int i;
	if (j->index == NULL)
		return;
	for (i = 0; i < j->header.index_size; i++) {
		if (POS_VALID(j->index[i]) &&
		    DNS_SERIAL_GE(serial, j->index[i].serial) &&
		    DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
			*best_guess = j->index[i];
	}
}

/*
 * Add a new index entry.  If there is no room, make room by removing
 * the odd-numbered entries and compacting the others into the first
 * half of the index.  This decimates old index entries exponentially
 * over time, so that the index always contains a much larger fraction
796
 * of recent serial numbers than of old ones.  This is deliberate -
Andreas Gustafsson's avatar
Andreas Gustafsson committed
797 798 799 800 801 802 803 804
 * most index searches are for outgoing IXFR, and IXFR tends to request
 * recent versions more often than old ones.
 */
static void
index_add(dns_journal_t *j, journal_pos_t *pos) {
	unsigned int i;
	if (j->index == NULL)
		return;
805 806 807
	/*
	 * Search for a vacant position.
	 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
808 809 810 811 812 813
	for (i = 0; i < j->header.index_size; i++) {
		if (! POS_VALID(j->index[i]))
			break;
	}
	if (i == j->header.index_size) {
		unsigned int k = 0;
814 815 816
		/*
		 * Found no vacant position.  Make some room.
		 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
817 818 819 820 821 822 823 824 825
		for (i = 0; i < j->header.index_size; i += 2) {
			j->index[k++] = j->index[i];
		}
		i = k; /* 'i' identifies the first vacant position. */
		while (k < j->header.index_size) {
			POS_INVALIDATE(j->index[k]);
			k++;
		}
	}
826
	INSIST(i < j->header.index_size);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
827 828
	INSIST(! POS_VALID(j->index[i]));

829 830 831
	/*
	 * Store the new index entry.
	 */
Andreas Gustafsson's avatar
Andreas Gustafsson committed
832 833 834 835 836 837 838 839
	j->index[i] = *pos;
}

/*
 * Invalidate any existing index entries that could become
 * ambiguous when a new transaction with number 'serial' is added.
 */
static void
840
index_invalidate(dns_journal_t *j, isc_uint32_t serial) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
841 842 843 844 845 846 847 848 849 850 851 852 853
	unsigned int i;
	if (j->index == NULL)
		return;
	for (i = 0; i < j->header.index_size; i++) {
		if (! DNS_SERIAL_GT(serial, j->index[i].serial))
			POS_INVALIDATE(j->index[i]);
	}
}

/*
 * Try to find a transaction with initial serial number 'serial'
 * in the journal 'j'.
 *
854
 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
855 856
 *
 * If 'serial' is current (= the ending serial number of the
857
 * last transaction in the journal), set '*pos' to
Andreas Gustafsson's avatar
Andreas Gustafsson committed
858
 * the position immediately following the last transaction and
859
 * return ISC_R_SUCCESS.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
860
 *
861
 * If 'serial' is within the range of addressable serial numbers
Andreas Gustafsson's avatar
Andreas Gustafsson committed
862
 * covered by the journal but that particular serial number is missing
863
 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
Andreas Gustafsson's avatar
Andreas Gustafsson committed
864 865
 *
 * If 'serial' is outside the range of addressable serial numbers
David Lawrence's avatar
David Lawrence committed
866
 * covered by the journal, return ISC_R_RANGE.
867
 *
Andreas Gustafsson's avatar
Andreas Gustafsson committed
868
 */
869
static isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
870
journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) {
871
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
872
	journal_pos_t current_pos;
873
	REQUIRE(DNS_JOURNAL_VALID(j));
874

Andreas Gustafsson's avatar
Andreas Gustafsson committed
875
	if (DNS_SERIAL_GT(j->header.begin.serial, serial))
David Lawrence's avatar
David Lawrence committed
876
		return (ISC_R_RANGE);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
877
	if (DNS_SERIAL_GT(serial, j->header.end.serial))
David Lawrence's avatar
David Lawrence committed
878
		return (ISC_R_RANGE);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
879 880
	if (serial == j->header.end.serial) {
		*pos = j->header.end;
881
		return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
882 883 884 885
	}

	current_pos = j->header.begin;
	index_find(j, serial, &current_pos);
886

Andreas Gustafsson's avatar
Andreas Gustafsson committed
887 888
	while (current_pos.serial != serial) {
		if (DNS_SERIAL_GT(current_pos.serial, serial))
889
			return (ISC_R_NOTFOUND);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
890
		result = journal_next(j, &current_pos);
891
		if (result != ISC_R_SUCCESS)
Andreas Gustafsson's avatar
Andreas Gustafsson committed
892 893 894
			return (result);
	}
	*pos = current_pos;
895
	return (ISC_R_SUCCESS);
Andreas Gustafsson's avatar
Andreas Gustafsson committed
896 897
}

898
isc_result_t
Andreas Gustafsson's avatar
Andreas Gustafsson committed
899 900
dns_journal_begin_transaction(dns_journal_t *j) {
	isc_uint32_t offset;
901
	isc_result_t result;
Andreas Gustafsson's avatar
Andreas Gustafsson committed
902
	journal_rawxhdr_t hdr;
903

904
	REQUIRE(DNS_JOURNAL_VALID(j));
Andreas Gustafsson's avatar
Andreas Gustafsson committed
905
	REQUIRE(j->state == JOURNAL_STATE_WRITE);
906

Andreas Gustafsson's avatar
Andreas Gustafsson committed
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
	/*
	 * Find the file offset where the new transaction should
	 * be written, and seek there.
	 */
	if (JOURNAL_EMPTY(&j->header)) {
		offset = sizeof(journal_rawheader_t) +
			j->header.index_size * sizeof(journal_rawpos_t);
	} else {
		offset = j->header.end.offset;
	}
	j->x.pos[0].offset = offset;
	j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
	j->x.n_soa = 0;

	CHECK(journal_seek(j, offset));
922

Andreas Gustafsson's avatar
Andreas Gustafsson committed
923 924 925 926 927 928 929 930 931 932
	/*