pax_global_header00006660000000000000000000000064131731570310014513gustar00rootroot0000000000000052 comment=3e56a632d174c4844923d419a3d9a5a9dbdb1101 pg_filedump-REL_10_0-c0e4028/000077500000000000000000000000001317315703100154745ustar00rootroot00000000000000pg_filedump-REL_10_0-c0e4028/.gitignore000066400000000000000000000000221317315703100174560ustar00rootroot00000000000000/*.o /pg_filedump pg_filedump-REL_10_0-c0e4028/Makefile000066400000000000000000000032771317315703100171450ustar00rootroot00000000000000# View README.pg_filedump first # note this must match version macros in pg_filedump.h FD_VERSION=10.0 # If working with a PG source directory, point PGSQL_INCLUDE_DIR to its # src/include subdirectory. If working with an installed tree, point to # the server include subdirectory, eg /usr/local/include/postgresql/server PG_CONFIG=pg_config PGSQL_CFLAGS=$(shell $(PG_CONFIG) --cflags) PGSQL_INCLUDE_DIR=$(shell $(PG_CONFIG) --includedir-server) PGSQL_LDFLAGS=$(shell $(PG_CONFIG) --ldflags) PGSQL_LIB_DIR=$(shell $(PG_CONFIG) --libdir) PGSQL_BIN_DIR=$(shell $(PG_CONFIG) --bindir) DISTFILES= README.pg_filedump Makefile Makefile.contrib \ pg_filedump.h pg_filedump.c decode.h decode.c stringinfo.c pg_lzcompress.c all: pg_filedump pg_filedump: pg_filedump.o decode.o stringinfo.o pg_lzcompress.o ${CC} ${PGSQL_LDFLAGS} ${LDFLAGS} -o pg_filedump pg_filedump.o decode.o stringinfo.o pg_lzcompress.o -L${PGSQL_LIB_DIR} -lpgport pg_filedump.o: pg_filedump.c ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} pg_filedump.c -c decode.o: decode.c ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} decode.c -c stringinfo.o: stringinfo.c ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} stringinfo.c -c pg_lzcompress.o: pg_lzcompress.c ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} pg_lzcompress.c -c dist: rm -rf pg_filedump-${FD_VERSION} pg_filedump-${FD_VERSION}.tar.gz mkdir pg_filedump-${FD_VERSION} cp -p ${DISTFILES} pg_filedump-${FD_VERSION} tar cfz pg_filedump-${FD_VERSION}.tar.gz pg_filedump-${FD_VERSION} rm -rf pg_filedump-${FD_VERSION} install: pg_filedump mkdir -p $(DESTDIR)$(PGSQL_BIN_DIR) install pg_filedump $(DESTDIR)$(PGSQL_BIN_DIR) clean: rm -f *.o pg_filedump pg_filedump-REL_10_0-c0e4028/Makefile.contrib000066400000000000000000000005011317315703100205670ustar00rootroot00000000000000PROGRAM = pg_filedump OBJS = pg_filedump.o DOCS = README.pg_filedump ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) else subdir = contrib/pg_filedump top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif pg_filedump-REL_10_0-c0e4028/README.pg_filedump000066400000000000000000000106151317315703100206510ustar00rootroot00000000000000pg_filedump - Display formatted contents of a PostgreSQL heap, index, or control file. Copyright (c) 2002-2010 Red Hat, Inc. Copyright (c) 2011-2017, PostgreSQL Global Development Group This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Original Author: Patrick Macdonald ------------------------------------------------------------------------ Overview: pg_filedump is a utility to format PostgreSQL heap/index/control files into a human-readable form. You can format/dump the files several ways, as listed in the Invocation section, as well as dumping straight binary. The type of file (heap/index) can usually be determined automatically by the content of the blocks within the file. However, to format a pg_control file you must use the -c option. The default is to format the entire file using the block size listed in block 0 and display block relative addresses. These defaults can be modified using run-time options. Some options may seem strange but they're there for a reason. For example, block size. It's there because if the header of block 0 is corrupt, you need a method of forcing a block size. ------------------------------------------------------------------------ Compile/Installation: To compile pg_filedump, you will need to have a properly configured PostgreSQL source tree or complete install tree (with include files) of the appropriate PostgreSQL major version. There are two makefiles included in this package. Makefile is a standalone makefile for pg_filedump. Alter its PGSQL_INCLUDE_DIR variable to point to the PostgreSQL include files. Makefile.contrib can be used if this package was untarred in the contrib directory of a PostgreSQL build tree. make make install (if using Makefile.contrib) It is also possible to use Makefile.contrib without being in the contrib directory: make -f Makefile.contrib USE_PGXS=1 This method requires that the pg_config program be in your PATH, but should not require any manual adjustments of the Makefile. ------------------------------------------------------------------------ Invocation: pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file Defaults are: relative addressing, range of the entire file, block size as listed on block 0 in the file The following options are valid for heap and index files: -a Display absolute addresses when formatting (Block header information is always block relative) -b Display binary block images within a range (Option will turn off all formatting options) -d Display formatted block content dump (Option will turn off all other formatting options) -D Decode tuples using given comma separated list of types. List of supported types: * bigint * bigserial * bool * char * charN -- char(n) * date * float * float4 * float8 * int * json * macaddr * name * oid * real * serial * smallint * smallserial * text * time * timestamp * timetz * uuid * varchar * varcharN -- varchar(n) * xid * xml * ~ -- ignores all attributes left in a tuple -f Display formatted block content dump along with interpretation -h Display this information -i Display interpreted item details -k Verify block checksums -R Display specific block ranges within the file (Blocks are indexed from 0) [startblock]: block to start at [endblock]: block to end at A startblock without an endblock will format the single block -s Force segment size to [segsize] -n Force segment number to [segnumber] -S Force block size to [blocksize] -x Force interpreted formatting of block items as index items -y Force interpreted formatting of block items as heap items The following options are valid for control files: -c Interpret the file listed as a control file -f Display formatted content dump along with interpretation -S Force block size to [blocksize] In most cases it's recommended to use the -i and -f options to get the most useful dump output. pg_filedump-REL_10_0-c0e4028/decode.c000066400000000000000000000504061317315703100170700ustar00rootroot00000000000000#include "postgres.h" #include "decode.h" #include #include #include #include #include #include #include #include #include #define ATTRTYPES_STR_MAX_LEN (1024-1) typedef int (*decode_callback_t)(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_char(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_name(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int decode_ignore(const char* buffer, unsigned int buff_size, unsigned int* out_size); static int ncallbacks = 0; static decode_callback_t callbacks[ATTRTYPES_STR_MAX_LEN / 2] = { NULL }; typedef struct { char* name; decode_callback_t callback; } ParseCallbackTableItem; static ParseCallbackTableItem callback_table[] = { { "smallserial", &decode_smallint }, { "smallint", &decode_smallint }, { "int", &decode_int }, { "oid", &decode_int }, { "xid", &decode_int }, { "serial", &decode_int }, { "bigint", &decode_bigint }, { "bigserial", &decode_bigint }, { "time", &decode_time }, { "timetz", &decode_timetz }, { "date", &decode_date }, { "timestamp", &decode_timestamp }, { "real", &decode_float4 }, { "float4", &decode_float4 }, { "float8", &decode_float8 }, { "float", &decode_float8 }, { "bool", &decode_bool }, { "uuid", &decode_uuid }, { "macaddr", &decode_macaddr }, { "name", &decode_name }, { "char", &decode_char }, { "~", &decode_ignore }, /* internally all string types are stored the same way */ { "charN", &decode_string }, { "varchar", &decode_string }, { "varcharN", &decode_string }, { "text", &decode_string }, { "json", &decode_string }, { "xml", &decode_string }, { NULL, NULL}, }; static StringInfoData copyString; static bool copyStringInitDone = false; /* * Temporary buffer for storing decompressed data. * * 64K should be enough in most cases. If it's not user can manually change * this limit. Unfortunately there is no way to know how much memory user * is willing to allocate. */ static char decompress_tmp_buff[64*1024]; /* Used by some PostgreSQL macro definitions */ void ExceptionalCondition(const char *conditionName, const char *errorType, const char *fileName, int lineNumber) { printf("Exceptional condition: name = %s, type = %s, fname = %s, line = %d\n", conditionName ? conditionName : "(NULL)", errorType ? errorType : "(NULL)", fileName ? fileName : "(NULL)", lineNumber); exit(1); } /* Append given string to current COPY line */ static void CopyAppend(const char* str) { if(!copyStringInitDone) { initStringInfo(©String); copyStringInitDone = true; } /* Caller probably wanted just to init copyString */ if(str == NULL) return; if(copyString.data[0] != '\0') appendStringInfoString(©String, "\t"); appendStringInfoString(©String, str); } /* * Append given string to current COPY line and encode special symbols * like \r, \n, \t and \\. */ static void CopyAppendEncode(const char* str, int orig_len) { /* * Should be enough in most cases. If it's not user can manually change * this limit. Unfortunately there is no way to know how much memory user * is willing to allocate. */ static char tmp_buff[64*1024]; /* Reserve one byte for a trailing zero. */ const int max_offset = sizeof(tmp_buff) - 2; int curr_offset = 0; int len = orig_len; while(len > 0) { /* * Make sure there is enough free space for at least one special symbol * and a trailing zero. */ if(curr_offset > max_offset - 2) { printf("ERROR: Unable to properly encode a string since it's too " "large (%d bytes). Try to increase tmp_buff size in CopyAppendEncode " "procedure.\n", orig_len); exit(1); } /* * Since we are working with potentially corrupted data we can encounter * \0 as well. */ if(*str == '\0') { tmp_buff[curr_offset] = '\\'; tmp_buff[curr_offset+1] = '0'; curr_offset += 2; } else if(*str == '\r') { tmp_buff[curr_offset] = '\\'; tmp_buff[curr_offset+1] = 'r'; curr_offset += 2; } else if(*str == '\n') { tmp_buff[curr_offset] = '\\'; tmp_buff[curr_offset+1] = 'n'; curr_offset += 2; } else if(*str == '\t') { tmp_buff[curr_offset] = '\\'; tmp_buff[curr_offset+1] = 'r'; curr_offset += 2; } else if(*str == '\\') { tmp_buff[curr_offset] = '\\'; tmp_buff[curr_offset+1] = '\\'; curr_offset += 2; } else { /* It's a regular symbol. */ tmp_buff[curr_offset] = *str; curr_offset++; } str++; len--; } tmp_buff[curr_offset] = '\0'; CopyAppend(tmp_buff); } /* CopyAppend version with format string support */ #define CopyAppendFmt(fmt, ...) do { \ char __copy_format_buff[512]; \ snprintf(__copy_format_buff, sizeof(__copy_format_buff), fmt, ##__VA_ARGS__); \ CopyAppend(__copy_format_buff); \ } while(0) /* Discard accumulated COPY line */ static void CopyClear(void) { /* Make sure init is done */ CopyAppend(NULL); resetStringInfo(©String); } /* Output and then clear accumulated COPY line */ static void CopyFlush(void) { /* Make sure init is done */ CopyAppend(NULL); printf("COPY: %s\n", copyString.data); CopyClear(); } /* * Add a callback to `callbacks` table for given type name * * Arguments: * type - name of a single type, always lowercase * * Return value is: * == 0 - no error * < 0 - invalid type name */ static int AddTypeCallback(const char* type) { int idx = 0; if(*type == '\0') /* ignore empty strings */ return 0; while(callback_table[idx].name != NULL) { if(strcmp(callback_table[idx].name, type) == 0) { callbacks[ncallbacks] = callback_table[idx].callback; ncallbacks++; return 0; } idx++; } printf("Error: type <%s> doesn't exist or is not currently supported\n", type); printf("Full list of known types: "); idx = 0; while(callback_table[idx].name != NULL) { printf("%s ", callback_table[idx].name); idx++; } printf("\n"); return -1; } /* * Decode attribute types string like "int,timestamp,bool,uuid" * * Arguments: * str - types string * Return value is: * == 0 - if string is valid * < 0 - if string is invalid */ int ParseAttributeTypesString(const char* str) { char *curr_type, *next_type; char attrtypes[ATTRTYPES_STR_MAX_LEN+1]; int i, len = strlen(str); if(len > ATTRTYPES_STR_MAX_LEN) { printf("Error: attribute types string is longer then %u characters!\n", ATTRTYPES_STR_MAX_LEN); return -1; } strcpy(attrtypes, str); for(i = 0; i < len; i++) attrtypes[i] = tolower(attrtypes[i]); curr_type = attrtypes; while(curr_type) { next_type = strstr(curr_type, ","); if(next_type) { *next_type = '\0'; next_type++; } if(AddTypeCallback(curr_type) < 0) return -1; curr_type = next_type; } return 0; } /* * Convert Julian day number (JDN) to a date. * Copy-pasted from src/backend/utils/adt/datetime.c */ static void j2date(int jd, int *year, int *month, int *day) { unsigned int julian; unsigned int quad; unsigned int extra; int y; julian = jd; julian += 32044; quad = julian / 146097; extra = (julian - quad * 146097) * 4 + 3; julian += 60 + quad * 3 + extra / 146097; quad = julian / 1461; julian -= quad * 1461; y = julian * 4 / 1461; julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366)) + 123; y += quad * 4; *year = y - 4800; quad = julian * 2141 / 65536; *day = julian - 7834 * quad / 256; *month = (quad + 10) % MONTHS_PER_YEAR + 1; } /* Decode a smallint type */ static int decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int16), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int16)) return -2; CopyAppendFmt("%d", (int)(*(int16*)buffer)); *out_size = sizeof(int16) + delta; return 0; } /* Decode an int type */ static int decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int32)) return -2; CopyAppendFmt("%d", *(int32*)buffer); *out_size = sizeof(int32) + delta; return 0; } /* Decode a bigint type */ static int decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int64)) return -2; CopyAppendFmt("%ld", *(int64*)buffer); *out_size = sizeof(int64) + delta; return 0; } /* Decode a time type */ static int decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); int64 timestamp, timestamp_sec; if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int64)) return -2; timestamp = *(int64*)buffer; timestamp_sec = timestamp / 1000000; *out_size = sizeof(int64) + delta; CopyAppendFmt("%02ld:%02ld:%02ld.%06ld", timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000); return 0; } /* Decode a timetz type */ static int decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); int64 timestamp, timestamp_sec; int32 tz_sec, tz_min; if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < (sizeof(int64) + sizeof(int32))) return -2; timestamp = *(int64*)buffer; tz_sec = *(int32*)(buffer + sizeof(int64)); timestamp_sec = timestamp / 1000000; tz_min = - (tz_sec / 60); *out_size = sizeof(int64) + sizeof(int32) + delta; CopyAppendFmt("%02ld:%02ld:%02ld.%06ld%c%02d:%02d", timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000, (tz_min > 0 ? '+' : '-'), abs(tz_min / 60), abs(tz_min % 60)); return 0; } /* Decode a date type */ static int decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); int32 jd, year, month, day; if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int32)) return -2; *out_size = sizeof(int32) + delta; jd = *(int32*)buffer + POSTGRES_EPOCH_JDATE; j2date(jd, &year, &month, &day); CopyAppendFmt("%04d-%02d-%02d%s", (year <= 0) ? -year + 1 : year, month, day, (year <= 0) ? " BC" : ""); return 0; } /* Decode a timestamp type */ static int decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); int64 timestamp, timestamp_sec; int32 jd, year, month, day; if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(int64)) return -2; *out_size = sizeof(int64) + delta; timestamp = *(int64*)buffer; jd = timestamp / USECS_PER_DAY; if (jd != 0) timestamp -= jd * USECS_PER_DAY; if (timestamp < INT64CONST(0)) { timestamp += USECS_PER_DAY; jd -= 1; } /* add offset to go from J2000 back to standard Julian date */ jd += POSTGRES_EPOCH_JDATE; j2date(jd, &year, &month, &day); timestamp_sec = timestamp / 1000000; CopyAppendFmt("%04d-%02d-%02d %02ld:%02ld:%02ld.%06ld%s", (year <= 0) ? -year + 1 : year, month, day, timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, timestamp % 1000000, (year <= 0) ? " BC" : ""); return 0; } /* Decode a float4 type */ static int decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(float), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(float)) return -2; CopyAppendFmt("%.12f", *(float*)buffer); *out_size = sizeof(float) + delta; return 0; } /* Decode a float8 type */ static int decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(double), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(double)) return -2; CopyAppendFmt("%.12lf", *(double*)buffer); *out_size = sizeof(double) + delta; return 0; } /* Decode an uuid type */ static int decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size) { unsigned char uuid[16]; if(buff_size < sizeof(uuid)) return -1; memcpy(uuid, buffer, sizeof(uuid)); CopyAppendFmt("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15] ); *out_size = sizeof(uuid); return 0; } /* Decode a macaddr type */ static int decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size) { unsigned char macaddr[6]; const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < sizeof(macaddr)) return -2; memcpy(macaddr, buffer, sizeof(macaddr)); CopyAppendFmt("%02x:%02x:%02x:%02x:%02x:%02x", macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5] ); *out_size = sizeof(macaddr) + delta; return 0; } /* Decode a bool type */ static int decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size) { if(buff_size < sizeof(bool)) return -1; CopyAppend(*(bool*)buffer ? "t" : "f"); *out_size = sizeof(bool); return 0; } /* Decode a name type (used mostly in catalog tables) */ static int decode_name(const char* buffer, unsigned int buff_size, unsigned int* out_size) { const char* new_buffer = (const char*)TYPEALIGN(sizeof(uint32), (uintptr_t)buffer); unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); if(buff_size < delta) return -1; buff_size -= delta; buffer = new_buffer; if(buff_size < NAMEDATALEN) return -2; CopyAppendEncode(buffer, strnlen(buffer, NAMEDATALEN)); *out_size = NAMEDATALEN + delta; return 0; } /* Decode a char type */ static int decode_char(const char* buffer, unsigned int buff_size, unsigned int* out_size) { if(buff_size < sizeof(char)) return -2; CopyAppendEncode(buffer, 1); *out_size = 1; return 0; } /* Ignore all data left */ static int decode_ignore(const char* buffer, unsigned int buff_size, unsigned int* out_size) { *out_size = buff_size; return 0; } /* Decode char(N), varchar(N), text, json or xml types */ static int decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size) { int padding = 0; /* Skip padding bytes. */ while(*buffer == 0x00) { if(buff_size == 0) return -1; buff_size--; buffer++; padding++; } if(VARATT_IS_1B_E(buffer)) { /* * 00000001 1-byte length word, unaligned, TOAST pointer */ uint8 tag = VARTAG_1B_E(buffer); uint32 len = VARTAG_SIZE(tag); if(len > buff_size) return -1; CopyAppend("(TOASTED)"); *out_size = padding + len; return 0; } if(VARATT_IS_1B(buffer)) { /* * xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to 126b) * xxxxxxx is 1 + string length */ uint8 len = VARSIZE_1B(buffer); if(len > buff_size) return -1; CopyAppendEncode(buffer + 1, len - 1); *out_size = padding + len; return 0; } if(VARATT_IS_4B_U(buffer) && buff_size >= 4) { /* * xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G) */ uint32 len = VARSIZE_4B(buffer); if(len > buff_size) return -1; CopyAppendEncode(buffer + 4, len - 4); *out_size = padding + len; return 0; } if(VARATT_IS_4B_C(buffer) && buff_size >= 8) { /* * xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G) */ int decompress_ret; uint32 len = VARSIZE_4B(buffer); uint32 decompressed_len = VARRAWSIZE_4B_C(buffer); if(len > buff_size) return -1; if(decompressed_len > sizeof(decompress_tmp_buff)) { printf("WARNING: Unable to decompress a string since it's too " "large (%d bytes after decompressing). Consider increasing " "decompress_tmp_buff size.\n", decompressed_len); CopyAppend("(COMPRESSED)"); *out_size = padding + len; return 0; } decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2*sizeof(uint32), decompress_tmp_buff, decompressed_len); if((decompress_ret != decompressed_len) || (decompress_ret < 0)) { printf("WARNING: Unable to decompress a string. Data is corrupted.\n"); CopyAppend("(COMPRESSED)"); *out_size = padding + len; return 0; } CopyAppendEncode(decompress_tmp_buff, decompressed_len); *out_size = padding + len; return 0; } return -9; } /* * Try to decode a tuple using a types string provided previously. * * Arguments: * tupleData - pointer to the tuple data * tupleSize - tuple size in bytes */ void FormatDecode(const char* tupleData, unsigned int tupleSize) { HeapTupleHeader header = (HeapTupleHeader)tupleData; const char* data = tupleData + header->t_hoff; unsigned int size = tupleSize - header->t_hoff; int curr_attr; CopyClear(); for(curr_attr = 0; curr_attr < ncallbacks; curr_attr++) { int ret; unsigned int processed_size = 0; if( (header->t_infomask & HEAP_HASNULL) && att_isnull(curr_attr, header->t_bits) ) { CopyAppend("\\N"); continue; } if(size <= 0) { printf("Error: unable to decode a tuple, no more bytes left. Partial data: %s\n", copyString.data); return; } ret = callbacks[curr_attr](data, size, &processed_size); if(ret < 0) { printf("Error: unable to decode a tuple, callback #%d returned %d. Partial data: %s\n", curr_attr+1, ret, copyString.data); return; } size -= processed_size; data += processed_size; } if(size != 0) { printf("Error: unable to decode a tuple, %d bytes left, 0 expected. Partial data: %s\n", size, copyString.data); return; } CopyFlush(); } pg_filedump-REL_10_0-c0e4028/decode.h000066400000000000000000000002721317315703100170710ustar00rootroot00000000000000#ifndef _PG_FILEDUMP_DECODE_H_ #define _PG_FILEDUMP_DECODE_H_ int ParseAttributeTypesString(const char* str); void FormatDecode(const char* tupleData, unsigned int tupleSize); #endif pg_filedump-REL_10_0-c0e4028/pg_filedump.c000066400000000000000000001524731317315703100201470ustar00rootroot00000000000000/* * pg_filedump.c - PostgreSQL file dump utility for dumping and * formatting heap (data), index and control files. * * Copyright (c) 2002-2010 Red Hat, Inc. * Copyright (c) 2011-2017, PostgreSQL Global Development Group * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Original Author: Patrick Macdonald */ #include "pg_filedump.h" #include /* checksum_impl.h uses Assert, which doesn't work outside the server */ #undef Assert #define Assert(X) #include "storage/checksum.h" #include "storage/checksum_impl.h" #include "decode.h" /*** * Global variables for ease of use mostly */ /* File to dump or format */ static FILE *fp = NULL; /* File name for display */ static char *fileName = NULL; /* Cache for current block */ static char *buffer = NULL; /* Current block size */ static unsigned int blockSize = 0; /* Current block in file */ static unsigned int currentBlock = 0; /* Segment size in bytes */ static unsigned int segmentSize = RELSEG_SIZE * BLCKSZ; /* Number of current segment */ static unsigned int segmentNumber = 0; /* Offset of current block */ static unsigned int pageOffset = 0; /* Number of bytes to format */ static unsigned int bytesToFormat = 0; /* Block version number */ static unsigned int blockVersion = 0; /* Program exit code */ static int exitCode = 0; /*** * Function Prototypes */ static void DisplayOptions(unsigned int validOptions); static unsigned int ConsumeOptions(int numOptions, char **options); static int GetOptionValue(char *optionString); static void FormatBlock(BlockNumber blkno); static unsigned int GetBlockSize(); static unsigned int GetSpecialSectionType(Page page); static bool IsBtreeMetaPage(Page page); static void CreateDumpFileHeader(int numOptions, char **options); static int FormatHeader(Page page, BlockNumber blkno); static void FormatItemBlock(Page page); static void FormatItem(unsigned int numBytes, unsigned int startIndex, unsigned int formatAs); static void FormatSpecial(); static void FormatControl(); static void FormatBinary(unsigned int numBytes, unsigned int startIndex); static void DumpBinaryBlock(); static void DumpFileContents(); /* Send properly formed usage information to the user. */ static void DisplayOptions(unsigned int validOptions) { if (validOptions == OPT_RC_COPYRIGHT) printf ("\nVersion %s (for %s)" "\nCopyright (c) 2002-2010 Red Hat, Inc." "\nCopyright (c) 2011-2017, PostgreSQL Global Development Group\n", FD_VERSION, FD_PG_VERSION); printf ("\nUsage: pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file\n\n" "Display formatted contents of a PostgreSQL heap/index/control file\n" "Defaults are: relative addressing, range of the entire file, block\n" " size as listed on block 0 in the file\n\n" "The following options are valid for heap and index files:\n" " -a Display absolute addresses when formatting (Block header\n" " information is always block relative)\n" " -b Display binary block images within a range (Option will turn\n" " off all formatting options)\n" " -d Display formatted block content dump (Option will turn off\n" " all other formatting options)\n" " -D Decode tuples using given comma separated list of types\n" " Supported types:\n" " bigint bigserial bool char charN date float float4 float8 int\n" " json macaddr name oid real serial smallint smallserial text\n" " time timestamp timetz uuid varchar varcharN xid xml\n" " ~ ignores all attributes left in a tuple\n" " -f Display formatted block content dump along with interpretation\n" " -h Display this information\n" " -i Display interpreted item details\n" " -k Verify block checksums\n" " -R Display specific block ranges within the file (Blocks are\n" " indexed from 0)\n" " [startblock]: block to start at\n" " [endblock]: block to end at\n" " A startblock without an endblock will format the single block\n" " -s Force segment size to [segsize]\n" " -n Force segment number to [segnumber]\n" " -S Force block size to [blocksize]\n" " -x Force interpreted formatting of block items as index items\n" " -y Force interpreted formatting of block items as heap items\n\n" "The following options are valid for control files:\n" " -c Interpret the file listed as a control file\n" " -f Display formatted content dump along with interpretation\n" " -S Force block size to [blocksize]\n" "\nReport bugs to \n"); } /* * Determine segment number by segment file name. For instance, if file * name is /path/to/xxxx.7 procedure returns 7. Default return value is 0. */ static unsigned int GetSegmentNumberFromFileName(const char* fileName) { int segnumOffset = strlen(fileName) - 1; if(segnumOffset < 0) return 0; while(isdigit(fileName[segnumOffset])) { segnumOffset--; if(segnumOffset < 0) return 0; } if(fileName[segnumOffset] != '.') return 0; return atoi(&fileName[segnumOffset+1]); } /* Iterate through the provided options and set the option flags. */ /* An error will result in a positive rc and will force a display */ /* of the usage information. This routine returns enum */ /* optionReturnCode values. */ static unsigned int ConsumeOptions(int numOptions, char **options) { unsigned int rc = OPT_RC_VALID; unsigned int x; unsigned int optionStringLength; char *optionString; char duplicateSwitch = 0x00; for (x = 1; x < numOptions; x++) { optionString = options[x]; optionStringLength = strlen(optionString); /* Range is a special case where we have to consume the next 1 or 2 */ /* parameters to mark the range start and end */ if ((optionStringLength == 2) && (strcmp(optionString, "-R") == 0)) { int range = 0; SET_OPTION(blockOptions, BLOCK_RANGE, 'R'); /* Only accept the range option once */ if (rc == OPT_RC_DUPLICATE) break; /* Make sure there are options after the range identifier */ if (x >= (numOptions - 2)) { rc = OPT_RC_INVALID; printf("Error: Missing range start identifier.\n"); exitCode = 1; break; } /* * Mark that we have the range and advance the option to what * should */ /* be the range start. Check the value of the next parameter */ optionString = options[++x]; if ((range = GetOptionValue(optionString)) < 0) { rc = OPT_RC_INVALID; printf("Error: Invalid range start identifier <%s>.\n", optionString); exitCode = 1; break; } /* The default is to dump only one block */ blockStart = blockEnd = (unsigned int) range; /* We have our range start marker, check if there is an end */ /* marker on the option line. Assume that the last option */ /* is the file we are dumping, so check if there are options */ /* range start marker and the file */ if (x <= (numOptions - 3)) { if ((range = GetOptionValue(options[x + 1])) >= 0) { /* End range must be => start range */ if (blockStart <= range) { blockEnd = (unsigned int) range; x++; } else { rc = OPT_RC_INVALID; printf("Error: Requested block range start <%d> is " "greater than end <%d>.\n", blockStart, range); exitCode = 1; break; } } } } /* Check for the special case where the user forces a block size */ /* instead of having the tool determine it. This is useful if */ /* the header of block 0 is corrupt and gives a garbage block size */ else if ((optionStringLength == 2) && (strcmp(optionString, "-S") == 0)) { int localBlockSize; SET_OPTION(blockOptions, BLOCK_FORCED, 'S'); /* Only accept the forced size option once */ if (rc == OPT_RC_DUPLICATE) break; /* The token immediately following -S is the block size */ if (x >= (numOptions - 2)) { rc = OPT_RC_INVALID; printf("Error: Missing block size identifier.\n"); break; } /* Next option encountered must be forced block size */ optionString = options[++x]; if ((localBlockSize = GetOptionValue(optionString)) > 0) blockSize = (unsigned int) localBlockSize; else { rc = OPT_RC_INVALID; printf("Error: Invalid block size requested <%s>.\n", optionString); exitCode = 1; break; } } /* Check for the special case where the user forces a segment size. */ else if ((optionStringLength == 2) && (strcmp(optionString, "-s") == 0)) { int localSegmentSize; SET_OPTION(segmentOptions, SEGMENT_SIZE_FORCED, 's'); /* Only accept the forced size option once */ if (rc == OPT_RC_DUPLICATE) break; /* The token immediately following -s is the segment size */ if (x >= (numOptions - 2)) { rc = OPT_RC_INVALID; printf("Error: Missing segment size identifier.\n"); exitCode = 1; break; } /* Next option encountered must be forced segment size */ optionString = options[++x]; if ((localSegmentSize = GetOptionValue(optionString)) > 0) segmentSize = (unsigned int) localSegmentSize; else { rc = OPT_RC_INVALID; printf("Error: Invalid segment size requested <%s>.\n", optionString); exitCode = 1; break; } } /* Check for the special case where the user forces tuples decoding. */ else if((optionStringLength == 2) && (strcmp(optionString, "-D") == 0)) { SET_OPTION(blockOptions, BLOCK_DECODE, 'D'); /* Only accept the decode option once */ if (rc == OPT_RC_DUPLICATE) break; /* The token immediately following -D is attrubute types string */ if (x >= (numOptions - 2)) { rc = OPT_RC_INVALID; printf("Error: Missing attribute types string.\n"); exitCode = 1; break; } /* Next option encountered must be attribute types string */ optionString = options[++x]; if(ParseAttributeTypesString(optionString) < 0) { rc = OPT_RC_INVALID; printf("Error: Invalid attribute types string <%s>.\n", optionString); exitCode = 1; break; } } /* Check for the special case where the user forces a segment number */ /* instead of having the tool determine it by file name. */ else if ((optionStringLength == 2) && (strcmp(optionString, "-n") == 0)) { int localSegmentNumber; SET_OPTION(segmentOptions, SEGMENT_NUMBER_FORCED, 'n'); /* Only accept the forced segment number option once */ if (rc == OPT_RC_DUPLICATE) break; /* The token immediately following -n is the segment number */ if (x >= (numOptions - 2)) { rc = OPT_RC_INVALID; printf("Error: Missing segment number identifier.\n"); exitCode = 1; break; } /* Next option encountered must be forced segment number */ optionString = options[++x]; if ((localSegmentNumber = GetOptionValue(optionString)) > 0) segmentNumber = (unsigned int) localSegmentNumber; else { rc = OPT_RC_INVALID; printf("Error: Invalid segment number requested <%s>.\n", optionString); exitCode = 1; break; } } /* The last option MUST be the file name */ else if (x == (numOptions - 1)) { /* Check to see if this looks like an option string before opening */ if (optionString[0] != '-') { fp = fopen(optionString, "rb"); if (fp) { fileName = options[x]; if(!(segmentOptions & SEGMENT_NUMBER_FORCED)) segmentNumber = GetSegmentNumberFromFileName(fileName); } else { rc = OPT_RC_FILE; printf("Error: Could not open file <%s>.\n", optionString); exitCode = 1; break; } } else { /* Could be the case where the help flag is used without a */ /* filename. Otherwise, the last option isn't a file */ if (strcmp(optionString, "-h") == 0) rc = OPT_RC_COPYRIGHT; else { rc = OPT_RC_FILE; printf("Error: Missing file name to dump.\n"); exitCode = 1; } break; } } else { unsigned int y; /* Option strings must start with '-' and contain switches */ if (optionString[0] != '-') { rc = OPT_RC_INVALID; printf("Error: Invalid option string <%s>.\n", optionString); exitCode = 1; break; } /* Iterate through the singular option string, throw out */ /* garbage, duplicates and set flags to be used in formatting */ for (y = 1; y < optionStringLength; y++) { switch (optionString[y]) { /* Use absolute addressing */ case 'a': SET_OPTION(blockOptions, BLOCK_ABSOLUTE, 'a'); break; /* Dump the binary contents of the page */ case 'b': SET_OPTION(blockOptions, BLOCK_BINARY, 'b'); break; /* Dump the listed file as a control file */ case 'c': SET_OPTION(controlOptions, CONTROL_DUMP, 'c'); break; /* Do not interpret the data. Format to hex and ascii. */ case 'd': SET_OPTION(blockOptions, BLOCK_NO_INTR, 'd'); break; /* * Format the contents of the block with * interpretation */ /* of the headers */ case 'f': SET_OPTION(blockOptions, BLOCK_FORMAT, 'f'); break; /* Display the usage screen */ case 'h': rc = OPT_RC_COPYRIGHT; break; /* Format the items in detail */ case 'i': SET_OPTION(itemOptions, ITEM_DETAIL, 'i'); break; /* Verify block checksums */ case 'k': SET_OPTION(blockOptions, BLOCK_CHECKSUMS, 'k'); break; /* Interpret items as standard index values */ case 'x': SET_OPTION(itemOptions, ITEM_INDEX, 'x'); if (itemOptions & ITEM_HEAP) { rc = OPT_RC_INVALID; printf("Error: Options and are " "mutually exclusive.\n"); exitCode = 1; } break; /* Interpret items as heap values */ case 'y': SET_OPTION(itemOptions, ITEM_HEAP, 'y'); if (itemOptions & ITEM_INDEX) { rc = OPT_RC_INVALID; printf("Error: Options and are " "mutually exclusive.\n"); exitCode = 1; } break; default: rc = OPT_RC_INVALID; printf("Error: Unknown option <%c>.\n", optionString[y]); exitCode = 1; break; } if (rc) break; } } } if (rc == OPT_RC_DUPLICATE) { printf("Error: Duplicate option listed <%c>.\n", duplicateSwitch); exitCode = 1; } /* If the user requested a control file dump, a pure binary */ /* block dump or a non-interpreted formatted dump, mask off */ /* all other block level options (with a few exceptions) */ if (rc == OPT_RC_VALID) { /* The user has requested a control file dump, only -f and */ /* -S are valid... turn off all other formatting */ if (controlOptions & CONTROL_DUMP) { if ((blockOptions & ~(BLOCK_FORMAT | BLOCK_FORCED)) || (itemOptions)) { rc = OPT_RC_INVALID; printf("Error: Invalid options used for Control File dump.\n" " Only options may be used with .\n"); exitCode = 1; } else { controlOptions |= (blockOptions & (BLOCK_FORMAT | BLOCK_FORCED)); blockOptions = itemOptions = 0; } } /* The user has requested a binary block dump... only -R and */ /* -f are honoured */ else if (blockOptions & BLOCK_BINARY) { blockOptions &= (BLOCK_BINARY | BLOCK_RANGE | BLOCK_FORCED); itemOptions = 0; } /* The user has requested a non-interpreted dump... only -a, */ /* -R and -f are honoured */ else if (blockOptions & BLOCK_NO_INTR) { blockOptions &= (BLOCK_NO_INTR | BLOCK_ABSOLUTE | BLOCK_RANGE | BLOCK_FORCED); itemOptions = 0; } } return (rc); } /* Given the index into the parameter list, convert and return the */ /* current string to a number if possible */ static int GetOptionValue(char *optionString) { unsigned int x; int value = -1; int optionStringLength = strlen(optionString); /* Verify the next option looks like a number */ for (x = 0; x < optionStringLength; x++) if (!isdigit((int) optionString[x])) break; /* Convert the string to a number if it looks good */ if (x == optionStringLength) value = atoi(optionString); return (value); } /* Read the page header off of block 0 to determine the block size */ /* used in this file. Can be overridden using the -S option. The */ /* returned value is the block size of block 0 on disk */ static unsigned int GetBlockSize() { unsigned int pageHeaderSize = sizeof(PageHeaderData); unsigned int localSize = 0; int bytesRead = 0; char localCache[pageHeaderSize]; /* Read the first header off of block 0 to determine the block size */ bytesRead = fread(&localCache, 1, pageHeaderSize, fp); rewind(fp); if (bytesRead == pageHeaderSize) localSize = (unsigned int) PageGetPageSize(&localCache); else { printf("Error: Unable to read full page header from block 0.\n" " ===> Read %u bytes\n", bytesRead); exitCode = 1; } return (localSize); } /* Determine the contents of the special section on the block and */ /* return this enum value */ static unsigned int GetSpecialSectionType(Page page) { unsigned int rc; unsigned int specialOffset; unsigned int specialSize; unsigned int specialValue; PageHeader pageHeader = (PageHeader) page; /* If this is not a partial header, check the validity of the */ /* special section offset and contents */ if (bytesToFormat > sizeof(PageHeaderData)) { specialOffset = (unsigned int) pageHeader->pd_special; /* Check that the special offset can remain on the block or */ /* the partial block */ if ((specialOffset == 0) || (specialOffset > blockSize) || (specialOffset > bytesToFormat)) rc = SPEC_SECT_ERROR_BOUNDARY; else { /* we may need to examine last 2 bytes of page to identify index */ uint16 *ptype = (uint16 *) (buffer + blockSize - sizeof(uint16)); specialSize = blockSize - specialOffset; /* If there is a special section, use its size to guess its */ /* contents, checking the last 2 bytes of the page in cases */ /* that are ambiguous. Note we don't attempt to dereference */ /* the pointers without checking bytesToFormat == blockSize. */ if (specialSize == 0) rc = SPEC_SECT_NONE; else if (specialSize == MAXALIGN(sizeof(uint32))) { /* If MAXALIGN is 8, this could be either a sequence or */ /* SP-GiST or GIN. */ if (bytesToFormat == blockSize) { specialValue = *((int *) (buffer + specialOffset)); if (specialValue == SEQUENCE_MAGIC) rc = SPEC_SECT_SEQUENCE; else if (specialSize == MAXALIGN(sizeof(SpGistPageOpaqueData)) && *ptype == SPGIST_PAGE_ID) rc = SPEC_SECT_INDEX_SPGIST; else if (specialSize == MAXALIGN(sizeof(GinPageOpaqueData))) rc = SPEC_SECT_INDEX_GIN; else rc = SPEC_SECT_ERROR_UNKNOWN; } else rc = SPEC_SECT_ERROR_UNKNOWN; } /* SP-GiST and GIN have same size special section, so check */ /* the page ID bytes first. */ else if (specialSize == MAXALIGN(sizeof(SpGistPageOpaqueData)) && bytesToFormat == blockSize && *ptype == SPGIST_PAGE_ID) rc = SPEC_SECT_INDEX_SPGIST; else if (specialSize == MAXALIGN(sizeof(GinPageOpaqueData))) rc = SPEC_SECT_INDEX_GIN; else if (specialSize > 2 && bytesToFormat == blockSize) { /* As of 8.3, BTree, Hash, and GIST all have the same size */ /* special section, but the last two bytes of the section */ /* can be checked to determine what's what. */ if (*ptype <= MAX_BT_CYCLE_ID && specialSize == MAXALIGN(sizeof(BTPageOpaqueData))) rc = SPEC_SECT_INDEX_BTREE; else if (*ptype == HASHO_PAGE_ID && specialSize == MAXALIGN(sizeof(HashPageOpaqueData))) rc = SPEC_SECT_INDEX_HASH; else if (*ptype == GIST_PAGE_ID && specialSize == MAXALIGN(sizeof(GISTPageOpaqueData))) rc = SPEC_SECT_INDEX_GIST; else rc = SPEC_SECT_ERROR_UNKNOWN; } else rc = SPEC_SECT_ERROR_UNKNOWN; } } else rc = SPEC_SECT_ERROR_UNKNOWN; return (rc); } /* Check whether page is a btree meta page */ static bool IsBtreeMetaPage(Page page) { PageHeader pageHeader = (PageHeader) page; if ((PageGetSpecialSize(page) == (MAXALIGN(sizeof(BTPageOpaqueData)))) && (bytesToFormat == blockSize)) { BTPageOpaque btpo = (BTPageOpaque) ((char *) page + pageHeader->pd_special); /* Must check the cycleid to be sure it's really btree. */ if ((btpo->btpo_cycleid <= MAX_BT_CYCLE_ID) && (btpo->btpo_flags & BTP_META)) return true; } return false; } /* Display a header for the dump so we know the file name, the options */ /* used and the time the dump was taken */ static void CreateDumpFileHeader(int numOptions, char **options) { unsigned int x; char optionBuffer[52] = "\0"; time_t rightNow = time(NULL); /* Iterate through the options and cache them. */ /* The maximum we can display is 50 option characters + spaces. */ for (x = 1; x < (numOptions - 1); x++) { if ((strlen(optionBuffer) + strlen(options[x])) > 50) break; strcat(optionBuffer, options[x]); strcat(optionBuffer, " "); } printf ("\n*******************************************************************\n" "* PostgreSQL File/Block Formatted Dump Utility - Version %s\n" "*\n" "* File: %s\n" "* Options used: %s\n*\n" "* Dump created on: %s" "*******************************************************************\n", FD_VERSION, fileName, (strlen(optionBuffer)) ? optionBuffer : "None", ctime(&rightNow)); } /* Dump out a formatted block header for the requested block */ static int FormatHeader(Page page, BlockNumber blkno) { int rc = 0; unsigned int headerBytes; PageHeader pageHeader = (PageHeader) page; printf("
-----\n"); /* Only attempt to format the header if the entire header (minus the item */ /* array) is available */ if (bytesToFormat < offsetof(PageHeaderData, pd_linp[0])) { headerBytes = bytesToFormat; rc = EOF_ENCOUNTERED; } else { XLogRecPtr pageLSN = PageGetLSN(page); int maxOffset = PageGetMaxOffsetNumber(page); char flagString[100]; headerBytes = offsetof(PageHeaderData, pd_linp[0]); blockVersion = (unsigned int) PageGetPageLayoutVersion(page); /* The full header exists but we have to check that the item array */ /* is available or how far we can index into it */ if (maxOffset > 0) { unsigned int itemsLength = maxOffset * sizeof(ItemIdData); if (bytesToFormat < (headerBytes + itemsLength)) { headerBytes = bytesToFormat; rc = EOF_ENCOUNTERED; } else headerBytes += itemsLength; } flagString[0] = '\0'; if (pageHeader->pd_flags & PD_HAS_FREE_LINES) strcat(flagString, "HAS_FREE_LINES|"); if (pageHeader->pd_flags & PD_PAGE_FULL) strcat(flagString, "PAGE_FULL|"); if (pageHeader->pd_flags & PD_ALL_VISIBLE) strcat(flagString, "ALL_VISIBLE|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; /* Interpret the content of the header */ printf (" Block Offset: 0x%08x Offsets: Lower %4u (0x%04hx)\n" " Block: Size %4d Version %4u Upper %4u (0x%04hx)\n" " LSN: logid %6d recoff 0x%08x Special %4u (0x%04hx)\n" " Items: %4d Free Space: %4u\n" " Checksum: 0x%04x Prune XID: 0x%08x Flags: 0x%04x (%s)\n" " Length (including item array): %u\n\n", pageOffset, pageHeader->pd_lower, pageHeader->pd_lower, (int) PageGetPageSize(page), blockVersion, pageHeader->pd_upper, pageHeader->pd_upper, (uint32) (pageLSN >> 32), (uint32) pageLSN, pageHeader->pd_special, pageHeader->pd_special, maxOffset, pageHeader->pd_upper - pageHeader->pd_lower, pageHeader->pd_checksum, pageHeader->pd_prune_xid, pageHeader->pd_flags, flagString, headerBytes); /* If it's a btree meta page, print the contents of the meta block. */ if (IsBtreeMetaPage(page)) { BTMetaPageData *btpMeta = BTPageGetMeta(buffer); printf(" BTree Meta Data: Magic (0x%08x) Version (%u)\n" " Root: Block (%u) Level (%u)\n" " FastRoot: Block (%u) Level (%u)\n\n", btpMeta->btm_magic, btpMeta->btm_version, btpMeta->btm_root, btpMeta->btm_level, btpMeta->btm_fastroot, btpMeta->btm_fastlevel); headerBytes += sizeof(BTMetaPageData); } /* Eye the contents of the header and alert the user to possible */ /* problems. */ if ((maxOffset < 0) || (maxOffset > blockSize) || (blockVersion != PG_PAGE_LAYOUT_VERSION) || /* only one we support */ (pageHeader->pd_upper > blockSize) || (pageHeader->pd_upper > pageHeader->pd_special) || (pageHeader->pd_lower < (sizeof(PageHeaderData) - sizeof(ItemIdData))) || (pageHeader->pd_lower > blockSize) || (pageHeader->pd_upper < pageHeader->pd_lower) || (pageHeader->pd_special > blockSize)) { printf(" Error: Invalid header information.\n\n"); exitCode = 1; } if (blockOptions & BLOCK_CHECKSUMS) { uint32 delta = (segmentSize/blockSize)*segmentNumber; uint16 calc_checksum = pg_checksum_page(page, delta + blkno); if (calc_checksum != pageHeader->pd_checksum) { printf(" Error: checksum failure: calculated 0x%04x.\n\n", calc_checksum); exitCode = 1; } } } /* If we have reached the end of file while interpreting the header, let */ /* the user know about it */ if (rc == EOF_ENCOUNTERED) { printf (" Error: End of block encountered within the header." " Bytes read: %4u.\n\n", bytesToFormat); exitCode = 1; } /* A request to dump the formatted binary of the block (header, */ /* items and special section). It's best to dump even on an error */ /* so the user can see the raw image. */ if (blockOptions & BLOCK_FORMAT) FormatBinary(headerBytes, 0); return (rc); } /* Dump out formatted items that reside on this block */ static void FormatItemBlock(Page page) { unsigned int x; unsigned int itemSize; unsigned int itemOffset; unsigned int itemFlags; ItemId itemId; int maxOffset = PageGetMaxOffsetNumber(page); /* If it's a btree meta page, the meta block is where items would normally */ /* be; don't print garbage. */ if (IsBtreeMetaPage(page)) return; printf(" ------ \n"); /* Loop through the items on the block. Check if the block is */ /* empty and has a sensible item array listed before running */ /* through each item */ if (maxOffset == 0) printf(" Empty block - no items listed \n\n"); else if ((maxOffset < 0) || (maxOffset > blockSize)) { printf(" Error: Item index corrupt on block. Offset: <%d>.\n\n", maxOffset); exitCode = 1; } else { int formatAs; char textFlags[16]; /* First, honour requests to format items a special way, then */ /* use the special section to determine the format style */ if (itemOptions & ITEM_INDEX) formatAs = ITEM_INDEX; else if (itemOptions & ITEM_HEAP) formatAs = ITEM_HEAP; else switch (specialType) { case SPEC_SECT_INDEX_BTREE: case SPEC_SECT_INDEX_HASH: case SPEC_SECT_INDEX_GIST: case SPEC_SECT_INDEX_GIN: formatAs = ITEM_INDEX; break; case SPEC_SECT_INDEX_SPGIST: { SpGistPageOpaque spgpo = (SpGistPageOpaque) ((char *) page + ((PageHeader) page)->pd_special); if (spgpo->flags & SPGIST_LEAF) formatAs = ITEM_SPG_LEAF; else formatAs = ITEM_SPG_INNER; } break; default: formatAs = ITEM_HEAP; break; } for (x = 1; x < (maxOffset + 1); x++) { itemId = PageGetItemId(page, x); itemFlags = (unsigned int) ItemIdGetFlags(itemId); itemSize = (unsigned int) ItemIdGetLength(itemId); itemOffset = (unsigned int) ItemIdGetOffset(itemId); switch (itemFlags) { case LP_UNUSED: strcpy(textFlags, "UNUSED"); break; case LP_NORMAL: strcpy(textFlags, "NORMAL"); break; case LP_REDIRECT: strcpy(textFlags, "REDIRECT"); break; case LP_DEAD: strcpy(textFlags, "DEAD"); break; default: /* shouldn't be possible */ sprintf(textFlags, "0x%02x", itemFlags); break; } printf(" Item %3u -- Length: %4u Offset: %4u (0x%04x)" " Flags: %s\n", x, itemSize, itemOffset, itemOffset, textFlags); /* Make sure the item can physically fit on this block before */ /* formatting */ if ((itemOffset + itemSize > blockSize) || (itemOffset + itemSize > bytesToFormat)) { printf(" Error: Item contents extend beyond block.\n" " BlockSize<%d> Bytes Read<%d> Item Start<%d>.\n", blockSize, bytesToFormat, itemOffset + itemSize); exitCode = 1; } else { /* If the user requests that the items be interpreted as */ /* heap or index items... */ if (itemOptions & ITEM_DETAIL) FormatItem(itemSize, itemOffset, formatAs); /* Dump the items contents in hex and ascii */ if (blockOptions & BLOCK_FORMAT) FormatBinary(itemSize, itemOffset); /* Decode tuple data */ if((blockOptions & BLOCK_DECODE) && (itemFlags == LP_NORMAL)) FormatDecode(&buffer[itemOffset], itemSize); if (x == maxOffset) printf("\n"); } } } } /* Interpret the contents of the item based on whether it has a special */ /* section and/or the user has hinted */ static void FormatItem(unsigned int numBytes, unsigned int startIndex, unsigned int formatAs) { static const char *const spgist_tupstates[4] = { "LIVE", "REDIRECT", "DEAD", "PLACEHOLDER" }; if (formatAs == ITEM_INDEX) { /* It is an IndexTuple item, so dump the index header */ if (numBytes < sizeof(ItemPointerData)) { if (numBytes) { printf(" Error: This item does not look like an index item.\n"); exitCode = 1; } } else { IndexTuple itup = (IndexTuple) (&(buffer[startIndex])); printf(" Block Id: %u linp Index: %u Size: %d\n" " Has Nulls: %u Has Varwidths: %u\n\n", ((uint32) ((itup->t_tid.ip_blkid.bi_hi << 16) | (uint16) itup->t_tid.ip_blkid.bi_lo)), itup->t_tid.ip_posid, (int) IndexTupleSize(itup), IndexTupleHasNulls(itup) ? 1 : 0, IndexTupleHasVarwidths(itup) ? 1 : 0); if (numBytes != IndexTupleSize(itup)) { printf(" Error: Item size difference. Given <%u>, " "Internal <%d>.\n", numBytes, (int) IndexTupleSize(itup)); exitCode = 1; } } } else if (formatAs == ITEM_SPG_INNER) { /* It is an SpGistInnerTuple item, so dump the index header */ if (numBytes < SGITHDRSZ) { if (numBytes) { printf(" Error: This item does not look like an SPGiST item.\n"); exitCode = 1; } } else { SpGistInnerTuple itup = (SpGistInnerTuple) (&(buffer[startIndex])); printf(" State: %s allTheSame: %d nNodes: %u prefixSize: %u\n\n", spgist_tupstates[itup->tupstate], itup->allTheSame, itup->nNodes, itup->prefixSize); if (numBytes != itup->size) { printf(" Error: Item size difference. Given <%u>, " "Internal <%d>.\n", numBytes, (int) itup->size); exitCode = 1; } else if (itup->prefixSize == MAXALIGN(itup->prefixSize)) { int i; SpGistNodeTuple node; /* Dump the prefix contents in hex and ascii */ if ((blockOptions & BLOCK_FORMAT) && SGITHDRSZ + itup->prefixSize <= numBytes) FormatBinary(SGITHDRSZ + itup->prefixSize, startIndex); /* Try to print the nodes, but only while pointer is sane */ SGITITERATE(itup, i, node) { int off = (char *) node - (char *) itup; if (off + SGNTHDRSZ > numBytes) break; printf(" Node %2u: Downlink: %u/%u Size: %d Null: %u\n", i, ((uint32) ((node->t_tid.ip_blkid.bi_hi << 16) | (uint16) node->t_tid.ip_blkid.bi_lo)), node->t_tid.ip_posid, (int) IndexTupleSize(node), IndexTupleHasNulls(node) ? 1 : 0); /* Dump the node's contents in hex and ascii */ if ((blockOptions & BLOCK_FORMAT) && off + IndexTupleSize(node) <= numBytes) FormatBinary(IndexTupleSize(node), startIndex + off); if (IndexTupleSize(node) != MAXALIGN(IndexTupleSize(node))) break; } } printf("\n"); } } else if (formatAs == ITEM_SPG_LEAF) { /* It is an SpGistLeafTuple item, so dump the index header */ if (numBytes < SGLTHDRSZ) { if (numBytes) { printf(" Error: This item does not look like an SPGiST item.\n"); exitCode = 1; } } else { SpGistLeafTuple itup = (SpGistLeafTuple) (&(buffer[startIndex])); printf(" State: %s nextOffset: %u Block Id: %u linp Index: %u\n\n", spgist_tupstates[itup->tupstate], itup->nextOffset, ((uint32) ((itup->heapPtr.ip_blkid.bi_hi << 16) | (uint16) itup->heapPtr.ip_blkid.bi_lo)), itup->heapPtr.ip_posid); if (numBytes != itup->size) { printf(" Error: Item size difference. Given <%u>, " "Internal <%d>.\n", numBytes, (int) itup->size); exitCode = 1; } } } else { /* It is a HeapTuple item, so dump the heap header */ int alignedSize = MAXALIGN(sizeof(HeapTupleHeaderData)); if (numBytes < alignedSize) { if (numBytes) { printf(" Error: This item does not look like a heap item.\n"); exitCode = 1; } } else { char flagString[256]; unsigned int x; unsigned int bitmapLength = 0; unsigned int oidLength = 0; unsigned int computedLength; unsigned int infoMask; unsigned int infoMask2; int localNatts; unsigned int localHoff; bits8 *localBits; unsigned int localBitOffset; HeapTupleHeader htup = (HeapTupleHeader) (&buffer[startIndex]); infoMask = htup->t_infomask; infoMask2 = htup->t_infomask2; localBits = &(htup->t_bits[0]); localNatts = HeapTupleHeaderGetNatts(htup); localHoff = htup->t_hoff; localBitOffset = offsetof(HeapTupleHeaderData, t_bits); printf(" XMIN: %u XMAX: %u CID|XVAC: %u", HeapTupleHeaderGetXmin(htup), HeapTupleHeaderGetRawXmax(htup), HeapTupleHeaderGetRawCommandId(htup)); if (infoMask & HEAP_HASOID) printf(" OID: %u", HeapTupleHeaderGetOid(htup)); printf("\n" " Block Id: %u linp Index: %u Attributes: %d Size: %d\n", ((uint32) ((htup->t_ctid.ip_blkid.bi_hi << 16) | (uint16) htup-> t_ctid.ip_blkid.bi_lo)), htup->t_ctid.ip_posid, localNatts, htup->t_hoff); /* Place readable versions of the tuple info mask into a buffer. */ /* Assume that the string can not expand beyond 256. */ flagString[0] = '\0'; if (infoMask & HEAP_HASNULL) strcat(flagString, "HASNULL|"); if (infoMask & HEAP_HASVARWIDTH) strcat(flagString, "HASVARWIDTH|"); if (infoMask & HEAP_HASEXTERNAL) strcat(flagString, "HASEXTERNAL|"); if (infoMask & HEAP_HASOID) strcat(flagString, "HASOID|"); if (infoMask & HEAP_XMAX_KEYSHR_LOCK) strcat(flagString, "XMAX_KEYSHR_LOCK|"); if (infoMask & HEAP_COMBOCID) strcat(flagString, "COMBOCID|"); if (infoMask & HEAP_XMAX_EXCL_LOCK) strcat(flagString, "XMAX_EXCL_LOCK|"); if (infoMask & HEAP_XMAX_LOCK_ONLY) strcat(flagString, "XMAX_LOCK_ONLY|"); if (infoMask & HEAP_XMIN_COMMITTED) strcat(flagString, "XMIN_COMMITTED|"); if (infoMask & HEAP_XMIN_INVALID) strcat(flagString, "XMIN_INVALID|"); if (infoMask & HEAP_XMAX_COMMITTED) strcat(flagString, "XMAX_COMMITTED|"); if (infoMask & HEAP_XMAX_INVALID) strcat(flagString, "XMAX_INVALID|"); if (infoMask & HEAP_XMAX_IS_MULTI) strcat(flagString, "XMAX_IS_MULTI|"); if (infoMask & HEAP_UPDATED) strcat(flagString, "UPDATED|"); if (infoMask & HEAP_MOVED_OFF) strcat(flagString, "MOVED_OFF|"); if (infoMask & HEAP_MOVED_IN) strcat(flagString, "MOVED_IN|"); if (infoMask2 & HEAP_KEYS_UPDATED) strcat(flagString, "KEYS_UPDATED|"); if (infoMask2 & HEAP_HOT_UPDATED) strcat(flagString, "HOT_UPDATED|"); if (infoMask2 & HEAP_ONLY_TUPLE) strcat(flagString, "HEAP_ONLY|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" infomask: 0x%04x (%s) \n", infoMask, flagString); /* As t_bits is a variable length array, determine the length of */ /* the header proper */ if (infoMask & HEAP_HASNULL) bitmapLength = BITMAPLEN(localNatts); else bitmapLength = 0; if (infoMask & HEAP_HASOID) oidLength += sizeof(Oid); computedLength = MAXALIGN(localBitOffset + bitmapLength + oidLength); /* * Inform the user of a header size mismatch or dump the t_bits * array */ if (computedLength != localHoff) { printf (" Error: Computed header length not equal to header size.\n" " Computed <%u> Header: <%d>\n", computedLength, localHoff); exitCode = 1; } else if ((infoMask & HEAP_HASNULL) && bitmapLength) { printf(" t_bits: "); for (x = 0; x < bitmapLength; x++) { printf("[%u]: 0x%02x ", x, localBits[x]); if (((x & 0x03) == 0x03) && (x < bitmapLength - 1)) printf("\n "); } printf("\n"); } printf("\n"); } } } /* On blocks that have special sections, print the contents */ /* according to previously determined special section type */ static void FormatSpecial() { PageHeader pageHeader = (PageHeader) buffer; char flagString[100] = "\0"; unsigned int specialOffset = pageHeader->pd_special; unsigned int specialSize = (blockSize >= specialOffset) ? (blockSize - specialOffset) : 0; printf(" -----\n"); switch (specialType) { case SPEC_SECT_ERROR_UNKNOWN: case SPEC_SECT_ERROR_BOUNDARY: printf(" Error: Invalid special section encountered.\n"); exitCode = 1; break; case SPEC_SECT_SEQUENCE: printf(" Sequence: 0x%08x\n", SEQUENCE_MAGIC); break; /* Btree index section */ case SPEC_SECT_INDEX_BTREE: { BTPageOpaque btreeSection = (BTPageOpaque) (buffer + specialOffset); if (btreeSection->btpo_flags & BTP_LEAF) strcat(flagString, "LEAF|"); if (btreeSection->btpo_flags & BTP_ROOT) strcat(flagString, "ROOT|"); if (btreeSection->btpo_flags & BTP_DELETED) strcat(flagString, "DELETED|"); if (btreeSection->btpo_flags & BTP_META) strcat(flagString, "META|"); if (btreeSection->btpo_flags & BTP_HALF_DEAD) strcat(flagString, "HALFDEAD|"); if (btreeSection->btpo_flags & BTP_SPLIT_END) strcat(flagString, "SPLITEND|"); if (btreeSection->btpo_flags & BTP_HAS_GARBAGE) strcat(flagString, "HASGARBAGE|"); if (btreeSection->btpo_flags & BTP_INCOMPLETE_SPLIT) strcat(flagString, "INCOMPLETESPLIT|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" BTree Index Section:\n" " Flags: 0x%04x (%s)\n" " Blocks: Previous (%d) Next (%d) %s (%d) CycleId (%d)\n\n", btreeSection->btpo_flags, flagString, btreeSection->btpo_prev, btreeSection->btpo_next, (btreeSection-> btpo_flags & BTP_DELETED) ? "Next XID" : "Level", btreeSection->btpo.level, btreeSection->btpo_cycleid); } break; /* Hash index section */ case SPEC_SECT_INDEX_HASH: { HashPageOpaque hashSection = (HashPageOpaque) (buffer + specialOffset); if (hashSection->hasho_flag & LH_UNUSED_PAGE) strcat(flagString, "UNUSED|"); if (hashSection->hasho_flag & LH_OVERFLOW_PAGE) strcat(flagString, "OVERFLOW|"); if (hashSection->hasho_flag & LH_BUCKET_PAGE) strcat(flagString, "BUCKET|"); if (hashSection->hasho_flag & LH_BITMAP_PAGE) strcat(flagString, "BITMAP|"); if (hashSection->hasho_flag & LH_META_PAGE) strcat(flagString, "META|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" Hash Index Section:\n" " Flags: 0x%04x (%s)\n" " Bucket Number: 0x%04x\n" " Blocks: Previous (%d) Next (%d)\n\n", hashSection->hasho_flag, flagString, hashSection->hasho_bucket, hashSection->hasho_prevblkno, hashSection->hasho_nextblkno); } break; /* GIST index section */ case SPEC_SECT_INDEX_GIST: { GISTPageOpaque gistSection = (GISTPageOpaque) (buffer + specialOffset); if (gistSection->flags & F_LEAF) strcat(flagString, "LEAF|"); if (gistSection->flags & F_DELETED) strcat(flagString, "DELETED|"); if (gistSection->flags & F_TUPLES_DELETED) strcat(flagString, "TUPLES_DELETED|"); if (gistSection->flags & F_FOLLOW_RIGHT) strcat(flagString, "FOLLOW_RIGHT|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" GIST Index Section:\n" " NSN: 0x%08x/0x%08x\n" " RightLink: %d\n" " Flags: 0x%08x (%s)\n\n", gistSection->nsn.xlogid, gistSection->nsn.xrecoff, gistSection->rightlink, gistSection->flags, flagString); } break; /* GIN index section */ case SPEC_SECT_INDEX_GIN: { GinPageOpaque ginSection = (GinPageOpaque) (buffer + specialOffset); if (ginSection->flags & GIN_DATA) strcat(flagString, "DATA|"); if (ginSection->flags & GIN_LEAF) strcat(flagString, "LEAF|"); if (ginSection->flags & GIN_DELETED) strcat(flagString, "DELETED|"); if (ginSection->flags & GIN_META) strcat(flagString, "META|"); if (ginSection->flags & GIN_LIST) strcat(flagString, "LIST|"); if (ginSection->flags & GIN_LIST_FULLROW) strcat(flagString, "FULLROW|"); if (ginSection->flags & GIN_INCOMPLETE_SPLIT) strcat(flagString, "INCOMPLETESPLIT|"); if (ginSection->flags & GIN_COMPRESSED) strcat(flagString, "COMPRESSED|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" GIN Index Section:\n" " Flags: 0x%08x (%s) Maxoff: %d\n" " Blocks: RightLink (%d)\n\n", ginSection->flags, flagString, ginSection->maxoff, ginSection->rightlink); } break; /* SP-GIST index section */ case SPEC_SECT_INDEX_SPGIST: { SpGistPageOpaque spgistSection = (SpGistPageOpaque) (buffer + specialOffset); if (spgistSection->flags & SPGIST_META) strcat(flagString, "META|"); if (spgistSection->flags & SPGIST_DELETED) strcat(flagString, "DELETED|"); if (spgistSection->flags & SPGIST_LEAF) strcat(flagString, "LEAF|"); if (spgistSection->flags & SPGIST_NULLS) strcat(flagString, "NULLS|"); if (strlen(flagString)) flagString[strlen(flagString) - 1] = '\0'; printf(" SPGIST Index Section:\n" " Flags: 0x%08x (%s)\n" " nRedirection: %d\n" " nPlaceholder: %d\n\n", spgistSection->flags, flagString, spgistSection->nRedirection, spgistSection->nPlaceholder); } break; /* No idea what type of special section this is */ default: printf(" Unknown special section type. Type: <%u>.\n", specialType); exitCode = 1; break; } /* Dump the formatted contents of the special section */ if (blockOptions & BLOCK_FORMAT) { if (specialType == SPEC_SECT_ERROR_BOUNDARY) { printf(" Error: Special section points off page." " Unable to dump contents.\n"); exitCode = 1; } else FormatBinary(specialSize, specialOffset); } } /* For each block, dump out formatted header and content information */ static void FormatBlock(BlockNumber blkno) { Page page = (Page) buffer; pageOffset = blockSize * currentBlock; specialType = GetSpecialSectionType(page); printf("\nBlock %4u **%s***************************************\n", currentBlock, (bytesToFormat == blockSize) ? "***************" : " PARTIAL BLOCK "); /* Either dump out the entire block in hex+acsii fashion or */ /* interpret the data based on block structure */ if (blockOptions & BLOCK_NO_INTR) FormatBinary(bytesToFormat, 0); else { int rc; /* Every block contains a header, items and possibly a special */ /* section. Beware of partial block reads though */ rc = FormatHeader(page, blkno); /* If we didn't encounter a partial read in the header, carry on... */ if (rc != EOF_ENCOUNTERED) { FormatItemBlock(page); if (specialType != SPEC_SECT_NONE) FormatSpecial(); } } } /* Dump out the content of the PG control file */ static void FormatControl() { unsigned int localPgVersion = 0; unsigned int controlFileSize = 0; time_t cd_time; time_t cp_time; printf ("\n *********************************************\n\n"); /* Check the version */ if (bytesToFormat >= offsetof(ControlFileData, catalog_version_no)) localPgVersion = ((ControlFileData *) buffer)->pg_control_version; if (localPgVersion >= 72) controlFileSize = sizeof(ControlFileData); else { printf("pg_filedump: pg_control version %u not supported.\n", localPgVersion); return; } /* Interpret the control file if it's all there */ if (bytesToFormat >= controlFileSize) { ControlFileData *controlData = (ControlFileData *) buffer; CheckPoint *checkPoint = &(controlData->checkPointCopy); pg_crc32 crcLocal; char *dbState; /* Compute a local copy of the CRC to verify the one on disk */ INIT_CRC32C(crcLocal); COMP_CRC32C(crcLocal, buffer, offsetof(ControlFileData, crc)); FIN_CRC32C(crcLocal); /* Grab a readable version of the database state */ switch (controlData->state) { case DB_STARTUP: dbState = "STARTUP"; break; case DB_SHUTDOWNED: dbState = "SHUTDOWNED"; break; case DB_SHUTDOWNED_IN_RECOVERY: dbState = "SHUTDOWNED_IN_RECOVERY"; break; case DB_SHUTDOWNING: dbState = "SHUTDOWNING"; break; case DB_IN_CRASH_RECOVERY: dbState = "IN CRASH RECOVERY"; break; case DB_IN_ARCHIVE_RECOVERY: dbState = "IN ARCHIVE RECOVERY"; break; case DB_IN_PRODUCTION: dbState = "IN PRODUCTION"; break; default: dbState = "UNKNOWN"; break; } /* convert timestamps to system's time_t width */ cd_time = controlData->time; cp_time = checkPoint->time; printf(" CRC: %s\n" " pg_control Version: %u%s\n" " Catalog Version: %u\n" " System Identifier: " UINT64_FORMAT "\n" " State: %s\n" " Last Mod Time: %s" " Last Checkpoint Record: Log File (%u) Offset (0x%08x)\n" " Previous Checkpoint Record: Log File (%u) Offset (0x%08x)\n" " Last Checkpoint Record Redo: Log File (%u) Offset (0x%08x)\n" " |- TimeLineID: %u\n" " |- Next XID: %u/%u\n" " |- Next OID: %u\n" " |- Next Multi: %u\n" " |- Next MultiOff: %u\n" " |- Time: %s" " Minimum Recovery Point: Log File (%u) Offset (0x%08x)\n" " Maximum Data Alignment: %u\n" " Floating-Point Sample: %.7g%s\n" " Database Block Size: %u\n" " Blocks Per Segment: %u\n" " XLOG Block Size: %u\n" " XLOG Segment Size: %u\n" " Maximum Identifier Length: %u\n" " Maximum Index Keys: %u\n" " TOAST Chunk Size: %u\n\n", EQ_CRC32C(crcLocal, controlData->crc) ? "Correct" : "Not Correct", controlData->pg_control_version, (controlData->pg_control_version == PG_CONTROL_VERSION ? "" : " (Not Correct!)"), controlData->catalog_version_no, controlData->system_identifier, dbState, ctime(&(cd_time)), (uint32) (controlData->checkPoint >> 32), (uint32) controlData->checkPoint, (uint32) (controlData->prevCheckPoint >> 32), (uint32) controlData->prevCheckPoint, (uint32) (checkPoint->redo >> 32), (uint32) checkPoint->redo, checkPoint->ThisTimeLineID, checkPoint->nextXidEpoch, checkPoint->nextXid, checkPoint->nextOid, checkPoint->nextMulti, checkPoint->nextMultiOffset, ctime(&cp_time), (uint32) (controlData->minRecoveryPoint >> 32), (uint32) controlData->minRecoveryPoint, controlData->maxAlign, controlData->floatFormat, (controlData->floatFormat == FLOATFORMAT_VALUE ? "" : " (Not Correct!)"), controlData->blcksz, controlData->relseg_size, controlData->xlog_blcksz, controlData->xlog_seg_size, controlData->nameDataLen, controlData->indexMaxKeys, controlData->toast_max_chunk_size); } else { printf(" Error: pg_control file size incorrect.\n" " Size: Correct <%u> Received <%u>.\n\n", controlFileSize, bytesToFormat); /* If we have an error, force a formatted dump so we can see */ /* where things are going wrong */ controlOptions |= CONTROL_FORMAT; exitCode = 1; } /* Dump hex and ascii representation of data */ if (controlOptions & CONTROL_FORMAT) { printf(" *****************" "**********************\n\n"); FormatBinary(bytesToFormat, 0); } } /* Dump out the contents of the block in hex and ascii. */ /* BYTES_PER_LINE bytes are formatted in each line. */ static void FormatBinary(unsigned int numBytes, unsigned int startIndex) { unsigned int index = 0; unsigned int stopIndex = 0; unsigned int x = 0; unsigned int lastByte = startIndex + numBytes; if (numBytes) { /* Iterate through a printable row detailing the current */ /* address, the hex and ascii values */ for (index = startIndex; index < lastByte; index += BYTES_PER_LINE) { stopIndex = index + BYTES_PER_LINE; /* Print out the address */ if (blockOptions & BLOCK_ABSOLUTE) printf(" %08x: ", (unsigned int) (pageOffset + index)); else printf(" %04x: ", (unsigned int) index); /* Print out the hex version of the data */ for (x = index; x < stopIndex; x++) { if (x < lastByte) printf("%02x", 0xff & ((unsigned) buffer[x])); else printf(" "); if ((x & 0x03) == 0x03) printf(" "); } printf(" "); /* Print out the ascii version of the data */ for (x = index; x < stopIndex; x++) { if (x < lastByte) printf("%c", isprint(buffer[x]) ? buffer[x] : '.'); else printf(" "); } printf("\n"); } printf("\n"); } } /* Dump the binary image of the block */ static void DumpBinaryBlock() { unsigned int x; for (x = 0; x < bytesToFormat; x++) putchar(buffer[x]); } /* Control the dumping of the blocks within the file */ static void DumpFileContents() { unsigned int initialRead = 1; unsigned int contentsToDump = 1; /* If the user requested a block range, seek to the correct position */ /* within the file for the start block. */ if (blockOptions & BLOCK_RANGE) { unsigned int position = blockSize * blockStart; if (fseek(fp, position, SEEK_SET) != 0) { printf("Error: Seek error encountered before requested " "start block <%d>.\n", blockStart); contentsToDump = 0; exitCode = 1; } else currentBlock = blockStart; } /* Iterate through the blocks in the file until you reach the end or */ /* the requested range end */ while (contentsToDump) { bytesToFormat = fread(buffer, 1, blockSize, fp); if (bytesToFormat == 0) { /* fseek() won't pop an error if you seek passed eof. The next */ /* subsequent read gets the error. */ if (initialRead) printf("Error: Premature end of file encountered.\n"); else if (!(blockOptions & BLOCK_BINARY)) printf("\n*** End of File Encountered. Last Block " "Read: %d ***\n", currentBlock - 1); contentsToDump = 0; } else { if (blockOptions & BLOCK_BINARY) DumpBinaryBlock(); else { if (controlOptions & CONTROL_DUMP) { FormatControl(); contentsToDump = false; } else FormatBlock(currentBlock); } } /* Check to see if we are at the end of the requested range. */ if ((blockOptions & BLOCK_RANGE) && (currentBlock >= blockEnd) && (contentsToDump)) { /* Don't print out message if we're doing a binary dump */ if (!(blockOptions & BLOCK_BINARY)) printf("\n*** End of Requested Range Encountered. " "Last Block Read: %d ***\n", currentBlock); contentsToDump = 0; } else currentBlock++; initialRead = 0; } } /* Consume the options and iterate through the given file, formatting as */ /* requested. */ int main(int argv, char **argc) { /* If there is a parameter list, validate the options */ unsigned int validOptions; validOptions = (argv < 2) ? OPT_RC_COPYRIGHT : ConsumeOptions(argv, argc); /* Display valid options if no parameters are received or invalid options */ /* where encountered */ if (validOptions != OPT_RC_VALID) DisplayOptions(validOptions); else { /* Don't dump the header if we're dumping binary pages */ if (!(blockOptions & BLOCK_BINARY)) CreateDumpFileHeader(argv, argc); /* If the user has not forced a block size, use the size of the */ /* control file data or the information from the block 0 header */ if (controlOptions) { if (!(controlOptions & CONTROL_FORCED)) blockSize = sizeof(ControlFileData); } else if (!(blockOptions & BLOCK_FORCED)) blockSize = GetBlockSize(); /* On a positive block size, allocate a local buffer to store */ /* the subsequent blocks */ if (blockSize > 0) { buffer = (char *) malloc(blockSize); if (buffer) DumpFileContents(); else { printf("\nError: Unable to create buffer of size <%d>.\n", blockSize); exitCode = 1; } } } /* Close out the file and get rid of the allocated block buffer */ if (fp) fclose(fp); if (buffer) free(buffer); exit(exitCode); } pg_filedump-REL_10_0-c0e4028/pg_filedump.h000066400000000000000000000115471317315703100201500ustar00rootroot00000000000000/* * pg_filedump.h - PostgreSQL file dump utility for dumping and * formatting heap (data), index and control files. * * Copyright (c) 2002-2010 Red Hat, Inc. * Copyright (c) 2011-2017, PostgreSQL Global Development Group * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Original Author: Patrick Macdonald */ #define FD_VERSION "10.0" /* version ID of pg_filedump */ #define FD_PG_VERSION "PostgreSQL 10.x" /* PG version it works with */ #include "postgres.h" #include #include #include "access/gin_private.h" #include "access/gist.h" #include "access/hash.h" #include "access/htup.h" #include "access/htup_details.h" #include "access/itup.h" #include "access/nbtree.h" #include "access/spgist_private.h" #include "catalog/pg_control.h" #include "storage/bufpage.h" /* Options for Block formatting operations */ static unsigned int blockOptions = 0; typedef enum blockSwitches { BLOCK_ABSOLUTE = 0x00000001, /* -a: Absolute(vs Relative) addressing */ BLOCK_BINARY = 0x00000002, /* -b: Binary dump of block */ BLOCK_FORMAT = 0x00000004, /* -f: Formatted dump of blocks / control file */ BLOCK_FORCED = 0x00000008, /* -S: Block size forced */ BLOCK_NO_INTR = 0x00000010, /* -d: Dump straight blocks */ BLOCK_RANGE = 0x00000020, /* -R: Specific block range to dump */ BLOCK_CHECKSUMS = 0x00000040, /* -k: verify block checksums */ BLOCK_DECODE = 0x00000080 /* -D: Try to decode tuples */ } blockSwitches; /* Segment-related options */ static unsigned int segmentOptions = 0; typedef enum segmentSwitches { SEGMENT_SIZE_FORCED = 0x00000001, /* -s: Segment size forced */ SEGMENT_NUMBER_FORCED = 0x00000002, /* -n: Segment number forced */ } segmentSwitches; /* -R[start]:Block range start */ static int blockStart = -1; /* -R[end]:Block range end */ static int blockEnd = -1; /* Options for Item formatting operations */ static unsigned int itemOptions = 0; typedef enum itemSwitches { ITEM_DETAIL = 0x00000001, /* -i: Display interpreted items */ ITEM_HEAP = 0x00000002, /* -y: Blocks contain HeapTuple items */ ITEM_INDEX = 0x00000004, /* -x: Blocks contain IndexTuple items */ ITEM_SPG_INNER = 0x00000008, /* Blocks contain SpGistInnerTuple items */ ITEM_SPG_LEAF = 0x00000010 /* Blocks contain SpGistLeafTuple items */ } itemSwitches; /* Options for Control File formatting operations */ static unsigned int controlOptions = 0; typedef enum controlSwitches { CONTROL_DUMP = 0x00000001, /* -c: Dump control file */ CONTROL_FORMAT = BLOCK_FORMAT, /* -f: Formatted dump of control file */ CONTROL_FORCED = BLOCK_FORCED /* -S: Block size forced */ } controlSwitches; /* Possible value types for the Special Section */ typedef enum specialSectionTypes { SPEC_SECT_NONE, /* No special section on block */ SPEC_SECT_SEQUENCE, /* Sequence info in special section */ SPEC_SECT_INDEX_BTREE, /* BTree index info in special section */ SPEC_SECT_INDEX_HASH, /* Hash index info in special section */ SPEC_SECT_INDEX_GIST, /* GIST index info in special section */ SPEC_SECT_INDEX_GIN, /* GIN index info in special section */ SPEC_SECT_INDEX_SPGIST, /* SP - GIST index info in special section */ SPEC_SECT_ERROR_UNKNOWN, /* Unknown error */ SPEC_SECT_ERROR_BOUNDARY /* Boundary error */ } specialSectionTypes; static unsigned int specialType = SPEC_SECT_NONE; /* Possible return codes from option validation routine. */ /* pg_filedump doesn't do much with them now but maybe in */ /* the future... */ typedef enum optionReturnCodes { OPT_RC_VALID, /* All options are valid */ OPT_RC_INVALID, /* Improper option string */ OPT_RC_FILE, /* File problems */ OPT_RC_DUPLICATE, /* Duplicate option encountered */ OPT_RC_COPYRIGHT /* Copyright should be displayed */ } optionReturnCodes; /* Simple macro to check for duplicate options and then set */ /* an option flag for later consumption */ #define SET_OPTION(_x,_y,_z) if (_x & _y) \ { \ rc = OPT_RC_DUPLICATE; \ duplicateSwitch = _z; \ } \ else \ _x |= _y; #define SEQUENCE_MAGIC 0x1717 /* PostgreSQL defined magic number */ #define EOF_ENCOUNTERED (-1) /* Indicator for partial read */ #define BYTES_PER_LINE 16 /* Format the binary 16 bytes per line */ pg_filedump-REL_10_0-c0e4028/pg_lzcompress.c000066400000000000000000000541051317315703100205340ustar00rootroot00000000000000/* ---------- * pg_lzcompress.c - * * This is an implementation of LZ compression for PostgreSQL. * It uses a simple history table and generates 2-3 byte tags * capable of backward copy information for 3-273 bytes with * a max offset of 4095. * * Entry routines: * * int32 * pglz_compress(const char *source, int32 slen, char *dest, * const PGLZ_Strategy *strategy); * * source is the input data to be compressed. * * slen is the length of the input data. * * dest is the output area for the compressed result. * It must be at least as big as PGLZ_MAX_OUTPUT(slen). * * strategy is a pointer to some information controlling * the compression algorithm. If NULL, the compiled * in default strategy is used. * * The return value is the number of bytes written in the * buffer dest, or -1 if compression fails; in the latter * case the contents of dest are undefined. * * int32 * pglz_decompress(const char *source, int32 slen, char *dest, * int32 rawsize) * * source is the compressed input. * * slen is the length of the compressed input. * * dest is the area where the uncompressed data will be * written to. It is the callers responsibility to * provide enough space. * * The data is written to buff exactly as it was handed * to pglz_compress(). No terminating zero byte is added. * * rawsize is the length of the uncompressed data. * * The return value is the number of bytes written in the * buffer dest, or -1 if decompression fails. * * The decompression algorithm and internal data format: * * It is made with the compressed data itself. * * The data representation is easiest explained by describing * the process of decompression. * * If compressed_size == rawsize, then the data * is stored uncompressed as plain bytes. Thus, the decompressor * simply copies rawsize bytes to the destination. * * Otherwise the first byte tells what to do the next 8 times. * We call this the control byte. * * An unset bit in the control byte means, that one uncompressed * byte follows, which is copied from input to output. * * A set bit in the control byte means, that a tag of 2-3 bytes * follows. A tag contains information to copy some bytes, that * are already in the output buffer, to the current location in * the output. Let's call the three tag bytes T1, T2 and T3. The * position of the data to copy is coded as an offset from the * actual output position. * * The offset is in the upper nibble of T1 and in T2. * The length is in the lower nibble of T1. * * So the 16 bits of a 2 byte tag are coded as * * 7---T1--0 7---T2--0 * OOOO LLLL OOOO OOOO * * This limits the offset to 1-4095 (12 bits) and the length * to 3-18 (4 bits) because 3 is always added to it. To emit * a tag of 2 bytes with a length of 2 only saves one control * bit. But we lose one byte in the possible length of a tag. * * In the actual implementation, the 2 byte tag's length is * limited to 3-17, because the value 0xF in the length nibble * has special meaning. It means, that the next following * byte (T3) has to be added to the length value of 18. That * makes total limits of 1-4095 for offset and 3-273 for length. * * Now that we have successfully decoded a tag. We simply copy * the output that occurred bytes back to the current * output location in the specified . Thus, a * sequence of 200 spaces (think about bpchar fields) could be * coded in 4 bytes. One literal space and a three byte tag to * copy 199 bytes with a -1 offset. Whow - that's a compression * rate of 98%! Well, the implementation needs to save the * original data size too, so we need another 4 bytes for it * and end up with a total compression rate of 96%, what's still * worth a Whow. * * The compression algorithm * * The following uses numbers used in the default strategy. * * The compressor works best for attributes of a size between * 1K and 1M. For smaller items there's not that much chance of * redundancy in the character sequence (except for large areas * of identical bytes like trailing spaces) and for bigger ones * our 4K maximum look-back distance is too small. * * The compressor creates a table for lists of positions. * For each input position (except the last 3), a hash key is * built from the 4 next input bytes and the position remembered * in the appropriate list. Thus, the table points to linked * lists of likely to be at least in the first 4 characters * matching strings. This is done on the fly while the input * is compressed into the output area. Table entries are only * kept for the last 4096 input positions, since we cannot use * back-pointers larger than that anyway. The size of the hash * table is chosen based on the size of the input - a larger table * has a larger startup cost, as it needs to be initialized to * zero, but reduces the number of hash collisions on long inputs. * * For each byte in the input, its hash key (built from this * byte and the next 3) is used to find the appropriate list * in the table. The lists remember the positions of all bytes * that had the same hash key in the past in increasing backward * offset order. Now for all entries in the used lists, the * match length is computed by comparing the characters from the * entries position with the characters from the actual input * position. * * The compressor starts with a so called "good_match" of 128. * It is a "prefer speed against compression ratio" optimizer. * So if the first entry looked at already has 128 or more * matching characters, the lookup stops and that position is * used for the next tag in the output. * * For each subsequent entry in the history list, the "good_match" * is lowered by 10%. So the compressor will be more happy with * short matches the farer it has to go back in the history. * Another "speed against ratio" preference characteristic of * the algorithm. * * Thus there are 3 stop conditions for the lookup of matches: * * - a match >= good_match is found * - there are no more history entries to look at * - the next history entry is already too far back * to be coded into a tag. * * Finally the match algorithm checks that at least a match * of 3 or more bytes has been found, because that is the smallest * amount of copy information to code into a tag. If so, a tag * is omitted and all the input bytes covered by that are just * scanned for the history add's, otherwise a literal character * is omitted and only his history entry added. * * Acknowledgements: * * Many thanks to Adisak Pochanayon, who's article about SLZ * inspired me to write the PostgreSQL compression this way. * * Jan Wieck * * Copyright (c) 1999-2017, PostgreSQL Global Development Group * * src/common/pg_lzcompress.c * ---------- */ #ifndef FRONTEND #include "postgres.h" #else #include "postgres_fe.h" #endif #include #include "common/pg_lzcompress.h" /* ---------- * Local definitions * ---------- */ #define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ #define PGLZ_HISTORY_SIZE 4096 #define PGLZ_MAX_MATCH 273 /* ---------- * PGLZ_HistEntry - * * Linked list for the backward history lookup * * All the entries sharing a hash key are linked in a doubly linked list. * This makes it easy to remove an entry when it's time to recycle it * (because it's more than 4K positions old). * ---------- */ typedef struct PGLZ_HistEntry { struct PGLZ_HistEntry *next; /* links for my hash key's list */ struct PGLZ_HistEntry *prev; int hindex; /* my current hash key */ const char *pos; /* my input position */ } PGLZ_HistEntry; /* ---------- * The provided standard strategies * ---------- */ static const PGLZ_Strategy strategy_default_data = { 32, /* Data chunks less than 32 bytes are not * compressed */ INT_MAX, /* No upper limit on what we'll try to * compress */ 25, /* Require 25% compression rate, or not worth * it */ 1024, /* Give up if no compression in the first 1KB */ 128, /* Stop history lookup if a match of 128 bytes * is found */ 10 /* Lower good match size by 10% at every loop * iteration */ }; const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; static const PGLZ_Strategy strategy_always_data = { 0, /* Chunks of any size are compressed */ INT_MAX, 0, /* It's enough to save one single byte */ INT_MAX, /* Never give up early */ 128, /* Stop history lookup if a match of 128 bytes * is found */ 6 /* Look harder for a good match */ }; const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; /* ---------- * Statically allocated work arrays for history * ---------- */ static int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; /* * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. */ #define INVALID_ENTRY 0 #define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY]) /* ---------- * pglz_hist_idx - * * Computes the history table slot for the lookup by the next 4 * characters in the input. * * NB: because we use the next 4 characters, we are not guaranteed to * find 3-character matches; they very possibly will be in the wrong * hash list. This seems an acceptable tradeoff for spreading out the * hash keys more. * ---------- */ #define pglz_hist_idx(_s,_e, _mask) ( \ ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \ ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \ ) /* ---------- * pglz_hist_add - * * Adds a new entry to the history table. * * If _recycle is true, then we are recycling a previously used entry, * and must first delink it from its old hashcode's linked list. * * NOTE: beware of multiple evaluations of macro's arguments, and note that * _hn and _recycle are modified in the macro. * ---------- */ #define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \ do { \ int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \ int16 *__myhsp = &(_hs)[__hindex]; \ PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ if (_recycle) { \ if (__myhe->prev == NULL) \ (_hs)[__myhe->hindex] = __myhe->next - (_he); \ else \ __myhe->prev->next = __myhe->next; \ if (__myhe->next != NULL) \ __myhe->next->prev = __myhe->prev; \ } \ __myhe->next = &(_he)[*__myhsp]; \ __myhe->prev = NULL; \ __myhe->hindex = __hindex; \ __myhe->pos = (_s); \ /* If there was an existing entry in this hash slot, link */ \ /* this new entry to it. However, the 0th entry in the */ \ /* entries table is unused, so we can freely scribble on it. */ \ /* So don't bother checking if the slot was used - we'll */ \ /* scribble on the unused entry if it was not, but that's */ \ /* harmless. Avoiding the branch in this critical path */ \ /* speeds this up a little bit. */ \ /* if (*__myhsp != INVALID_ENTRY) */ \ (_he)[(*__myhsp)].prev = __myhe; \ *__myhsp = _hn; \ if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \ (_hn) = 1; \ (_recycle) = true; \ } \ } while (0) /* ---------- * pglz_out_ctrl - * * Outputs the last and allocates a new control byte if needed. * ---------- */ #define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ do { \ if ((__ctrl & 0xff) == 0) \ { \ *(__ctrlp) = __ctrlb; \ __ctrlp = (__buf)++; \ __ctrlb = 0; \ __ctrl = 1; \ } \ } while (0) /* ---------- * pglz_out_literal - * * Outputs a literal byte to the destination buffer including the * appropriate control bit. * ---------- */ #define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ do { \ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ *(_buf)++ = (unsigned char)(_byte); \ _ctrl <<= 1; \ } while (0) /* ---------- * pglz_out_tag - * * Outputs a backward reference tag of 2-4 bytes (depending on * offset and length) to the destination buffer including the * appropriate control bit. * ---------- */ #define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ do { \ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ _ctrlb |= _ctrl; \ _ctrl <<= 1; \ if (_len > 17) \ { \ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \ (_buf)[1] = (unsigned char)(((_off) & 0xff)); \ (_buf)[2] = (unsigned char)((_len) - 18); \ (_buf) += 3; \ } else { \ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ (_buf)[1] = (unsigned char)((_off) & 0xff); \ (_buf) += 2; \ } \ } while (0) /* ---------- * pglz_find_match - * * Lookup the history table if the actual input stream matches * another sequence of characters, starting somewhere earlier * in the input buffer. * ---------- */ static inline int pglz_find_match(int16 *hstart, const char *input, const char *end, int *lenp, int *offp, int good_match, int good_drop, int mask) { PGLZ_HistEntry *hent; int16 hentno; int32 len = 0; int32 off = 0; /* * Traverse the linked history list until a good enough match is found. */ hentno = hstart[pglz_hist_idx(input, end, mask)]; hent = &hist_entries[hentno]; while (hent != INVALID_ENTRY_PTR) { const char *ip = input; const char *hp = hent->pos; int32 thisoff; int32 thislen; /* * Stop if the offset does not fit into our tag anymore. */ thisoff = ip - hp; if (thisoff >= 0x0fff) break; /* * Determine length of match. A better match must be larger than the * best so far. And if we already have a match of 16 or more bytes, * it's worth the call overhead to use memcmp() to check if this match * is equal for the same size. After that we must fallback to * character by character comparison to know the exact position where * the diff occurred. */ thislen = 0; if (len >= 16) { if (memcmp(ip, hp, len) == 0) { thislen = len; ip += len; hp += len; while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { thislen++; ip++; hp++; } } } else { while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { thislen++; ip++; hp++; } } /* * Remember this match as the best (if it is) */ if (thislen > len) { len = thislen; off = thisoff; } /* * Advance to the next history entry */ hent = hent->next; /* * Be happy with lesser good matches the more entries we visited. But * no point in doing calculation if we're at end of list. */ if (hent != INVALID_ENTRY_PTR) { if (len >= good_match) break; good_match -= (good_match * good_drop) / 100; } } /* * Return match information only if it results at least in one byte * reduction. */ if (len > 2) { *lenp = len; *offp = off; return 1; } return 0; } /* ---------- * pglz_compress - * * Compresses source into dest using strategy. Returns the number of * bytes written in buffer dest, or -1 if compression fails. * ---------- */ int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy) { unsigned char *bp = (unsigned char *) dest; unsigned char *bstart = bp; int hist_next = 1; bool hist_recycle = false; const char *dp = source; const char *dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char *ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; bool found_match = false; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 result_size; int32 result_max; int32 need_rate; int hashsz; int mask; /* * Our fallback strategy is the default. */ if (strategy == NULL) strategy = PGLZ_strategy_default; /* * If the strategy forbids compression (at all or if source chunk size out * of range), fail. */ if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) return -1; /* * Limit the match parameters to the supported range. */ good_match = strategy->match_size_good; if (good_match > PGLZ_MAX_MATCH) good_match = PGLZ_MAX_MATCH; else if (good_match < 17) good_match = 17; good_drop = strategy->match_size_drop; if (good_drop < 0) good_drop = 0; else if (good_drop > 100) good_drop = 100; need_rate = strategy->min_comp_rate; if (need_rate < 0) need_rate = 0; else if (need_rate > 99) need_rate = 99; /* * Compute the maximum result size allowed by the strategy, namely the * input size minus the minimum wanted compression rate. This had better * be <= slen, else we might overrun the provided output buffer. */ if (slen > (INT_MAX / 100)) { /* Approximate to avoid overflow */ result_max = (slen / 100) * (100 - need_rate); } else result_max = (slen * (100 - need_rate)) / 100; /* * Experiments suggest that these hash sizes work pretty well. A large * hash table minimizes collision, but has a higher startup cost. For a * small input, the startup cost dominates. The table size must be a power * of two. */ if (slen < 128) hashsz = 512; else if (slen < 256) hashsz = 1024; else if (slen < 512) hashsz = 2048; else if (slen < 1024) hashsz = 4096; else hashsz = 8192; mask = hashsz - 1; /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ memset(hist_start, 0, hashsz * sizeof(int16)); /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, fail. * * We check once per loop; since the loop body could emit as many as 4 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better * allow 4 slop bytes. */ if (bp - bstart >= result_max) return -1; /* * If we've emitted more than first_success_by bytes without finding * anything compressible at all, fail. This lets us fall out * reasonably quickly when looking at incompressible input (such as * pre-compressed data). */ if (!found_match && bp - bstart >= strategy->first_success_by) return -1; /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop, mask)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } found_match = true; } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * Write out the last control byte and check that we haven't overrun the * output size allowed by the strategy. */ *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) return -1; /* success */ return result_size; } /* ---------- * pglz_decompress - * * Decompresses source into dest. Returns the number of bytes * decompressed in the destination buffer, or -1 if decompression * fails. * ---------- */ int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize) { const unsigned char *sp; const unsigned char *srcend; unsigned char *dp; unsigned char *destend; sp = (const unsigned char *) source; srcend = ((const unsigned char *) source) + slen; dp = (unsigned char *) dest; destend = dp + rawsize; while (sp < srcend && dp < destend) { /* * Read one control byte and process the next 8 items (or as many as * remain in the compressed input). */ unsigned char ctrl = *sp++; int ctrlc; for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++) { if (ctrl & 1) { /* * Otherwise it contains the match length minus 3 and the * upper 4 bits of the offset. The next following byte * contains the lower 8 bits of the offset. If the length is * coded as 18, another extension tag byte tells how much * longer the match really was (0-255). */ int32 len; int32 off; len = (sp[0] & 0x0f) + 3; off = ((sp[0] & 0xf0) << 4) | sp[1]; sp += 2; if (len == 18) len += *sp++; /* * Check for output buffer overrun, to ensure we don't clobber * memory in case of corrupt input. Note: we must advance dp * here to ensure the error is detected below the loop. We * don't simply put the elog inside the loop since that will * probably interfere with optimization. */ if (dp + len > destend) { dp += len; break; } /* * Now we copy the bytes specified by the tag from OUTPUT to * OUTPUT. It is dangerous and platform dependent to use * memcpy() here, because the copied areas could overlap * extremely! */ while (len--) { *dp = dp[-off]; dp++; } } else { /* * An unset control bit means LITERAL BYTE. So we just copy * one from INPUT to OUTPUT. */ if (dp >= destend) /* check for buffer overrun */ break; /* do not clobber memory */ *dp++ = *sp++; } /* * Advance the control bit */ ctrl >>= 1; } } /* * Check we decompressed the right amount. */ if (dp != destend || sp != srcend) return -1; /* * That's it. */ return rawsize; } pg_filedump-REL_10_0-c0e4028/stringinfo.c000066400000000000000000000105221317315703100200220ustar00rootroot00000000000000/* * Code mostly borrowed from PostgreSQL's stringinfo.c * palloc replaced to malloc, etc. */ #include "postgres.h" #include #include #include /* 64 Kb - until pg_filedump doesn't support TOAST it doesn't need more */ #define MaxAllocSize ((Size) (64*1024)) /*------------------------- * StringInfoData holds information about an extensible string. * data is the current buffer for the string. * len is the current string length. There is guaranteed to be * a terminating '\0' at data[len], although this is not very * useful when the string holds binary data rather than text. * maxlen is the allocated size in bytes of 'data', i.e. the maximum * string size (including the terminating '\0' char) that we can * currently store in 'data' without having to reallocate * more space. We must always have maxlen > len. * cursor is initialized to zero by makeStringInfo or initStringInfo, * but is not otherwise touched by the stringinfo.c routines. * Some routines use it to scan through a StringInfo. *------------------------- */ /* * initStringInfo * * Initialize a StringInfoData struct (with previously undefined contents) * to describe an empty string. */ void initStringInfo(StringInfo str) { int size = 1024; /* initial default buffer size */ str->data = (char *) malloc(size); str->maxlen = size; resetStringInfo(str); } /* * resetStringInfo * * Reset the StringInfo: the data buffer remains valid, but its * previous content, if any, is cleared. */ void resetStringInfo(StringInfo str) { str->data[0] = '\0'; str->len = 0; str->cursor = 0; } /* * appendStringInfoString * * Append a null-terminated string to str. */ void appendStringInfoString(StringInfo str, const char *s) { appendBinaryStringInfo(str, s, strlen(s)); } /* * appendBinaryStringInfo * * Append arbitrary binary data to a StringInfo, allocating more space * if necessary. */ void appendBinaryStringInfo(StringInfo str, const char *data, int datalen) { assert(str != NULL); /* Make more room if needed */ enlargeStringInfo(str, datalen); /* OK, append the data */ memcpy(str->data + str->len, data, datalen); str->len += datalen; /* * Keep a trailing null in place, even though it's probably useless for * binary data. (Some callers are dealing with text but call this because * their input isn't null-terminated.) */ str->data[str->len] = '\0'; } /* * enlargeStringInfo * * Make sure there is enough space for 'needed' more bytes * ('needed' does not include the terminating null). * * External callers usually need not concern themselves with this, since * all stringinfo.c routines do it automatically. However, if a caller * knows that a StringInfo will eventually become X bytes large, it * can save some malloc overhead by enlarging the buffer before starting * to store data in it. */ void enlargeStringInfo(StringInfo str, int needed) { Size newlen; Size limit; char* old_data; limit = MaxAllocSize; /* * Guard against out-of-range "needed" values. Without this, we can get * an overflow or infinite loop in the following. */ if (needed < 0) /* should not happen */ { printf("Error: invalid string enlargement request size: %d", needed); exit(1); } if (((Size) needed) >= (limit - (Size) str->len)) { printf("Error: cannot enlarge string buffer containing %d bytes by %d more bytes.", str->len, needed); exit(1); } needed += str->len + 1; /* total space required now */ /* Because of the above test, we now have needed <= limit */ if (needed <= str->maxlen) return; /* got enough space already */ /* * We don't want to allocate just a little more space with each append; * for efficiency, double the buffer size each time it overflows. * Actually, we might need to more than double it if 'needed' is big... */ newlen = 2 * str->maxlen; while (needed > newlen) newlen = 2 * newlen; /* * Clamp to the limit in case we went past it. Note we are assuming here * that limit <= INT_MAX/2, else the above loop could overflow. We will * still have newlen >= needed. */ if (newlen > limit) newlen = limit; old_data = str->data; str->data = (char *) realloc(str->data, (Size) newlen); if(str->data == NULL) { free(old_data); printf("Error: realloc() failed!\n"); exit(1); } str->maxlen = newlen; }