Zlib-based field compression (think gzip): sqz_z(string, <level>) - compress string exp_z(string) - expand string Requires zlib - http://www.gzip.org/zlib/
/* * * Mysql user-defined-functions, sqz_z() and exp_z(), for zlib compression. * Zlib is available from http://www.gzip.org/zlib/ * * Written by Lester Hightower, hightowe _@_ 10east.com * * Note: Do not try to store compressed data in char or varchar columns. * Those column types remove trailing spaces from the data, which ruins * your data when it is zipped and the compressed data ends with * spaces, which happens quite often. You must use one of the BLOB * or TEXT types to store gzipped data. * * - 12/19/2001: + added the optional <level> parameter to sqz_z() * v. 0.1 + eliminated the define MAX_UNCOMPRESSED_SIZE in exchange * for mallocing something that is likely large enough for * the exp_z() function, via MAX_COMPRESSION_RATIO * * - 08/08/2003 + Worked with John R. Smith, Jr. on udf_lzo and in so * v. 0.2 doing discovered the problem with this, and his, UDF. * Both were related to memory allocation, and we fixed * both in a similar way. We also chose to rename our * functions to something that is consistent between us: * sqz_<method>(), exp_<method>() for squeeze/expand. * * - 08/14/2003 + Added MAX_UNCOMPRESS_BUF_RETRIES define and related * v. 0.3 code to exp_z() to allow for some automated memory * buffer growth above MAX_COMPRESSION_RATIO. * * This software is public domain and comes with NO WARRANTY of any kind. * * This code was written using: * - udf_example1.cc as the basis. * udf_example1.cc ships with the mysqld source code. * - the zlib documentation found at www.info-zip.org. * * The only known limitation is that the largest uncompressed size possible, * by default, is 10 times the compressed data's size, no more than a 10 to 1 * compression ratio. This is probably reasonable for most users. * This can be changed via the MAX_COMPRESSION_RATIO define. * * USAGE: * sqz_z(string, <level>) * - string is typically a quoted constant, or a column containing text. * - <level> is optional, and is the desired compression level, 0-9. * The compression level coresponds to the levels in gzip (zlib). * exp_z(string) * - string is typically a column containing gzipped data. * * EXAMPLES: * Load a new table full of gz'ed text: * mysql> insert into events_raw_gz \ * select event_id,sqz_z(event_rawmsg) from events_raw; * ******************************************************************************* ** ** Syntax for the new commands are: ** create function <function_name> returns {string|real|integer} ** soname <name_of_shared_library> ** drop function <function_name> ** ** CREATE FUNCTION sqz_z RETURNS STRING SONAME "libmysqld_udf_zlib.so"; ** CREATE FUNCTION exp_z RETURNS STRING SONAME "libmysqld_udf_zlib.so"; ** ** After this the functions will work exactly like native MySQL functions. ** Functions should be created only once. ** ** The functions can be deleted by: ** ** DROP FUNCTION sqz_z; ** DROP FUNCTION exp_z; ** ** The CREATE FUNCTION and DROP FUNCTION update the mysql.func table. ** All Active functions will be reloaded on every restart of server ** (if --skip-grant-tables is not given) ** */
#ifdef STANDARD #include <stdio.h> #include <string.h> #else #include <my_global.h> #include <my_sys.h> #include <m_string.h> #endif #include <mysql.h> #include <m_ctype.h> #include <zlib.h> #include <pthread.h>
#ifdef HAVE_DLOPEN
/* ********************************************************************** * Defines specific to this UDF *************************************** ********************************************************************** * * If you work with a wide mix of very large and very small data, in * the same compressed column of a data table, you may want to set the * UDF_DEBUG_LEVEL to 1 and watch the logs sent to mysql.err, and from * that data determine a more appropriate, probably higher, value for * INITIAL_RAM_ALLOC for your application. * * As of version 0.3, the _true_ "max compression ratio" that this * code will handle without throwing an error is the value of * MAX_COMPRESSION_RATIO * MAX_UNCOMPRESS_BUF_RETRIES, which as shipped, * is 4 * 10 == 40. If a lot of your data achieves better than a 4 to 1 * compressions ratio, you will get better performance from this code * by increasing the value of MAX_COMPRESSION_RATIO, and you can then * correspondingly decrease MAX_UNCOMPRESS_BUF_RETRIES. * * With UDF_DEBUG_LEVEL set to 1, here is a copy/paste from the * mysql.err log, just as an example: * * exp_z_init() InitID=136855900, malloc 524288 bytes * exp_z() InitID=136855900, Z_BUF_ERROR - increasing compression_ratio to 8 * exp_z() InitID=136855900, Allocating more memory: 524288 -> 1031096 * * Notice that a 4 to 1 compressions ratio was not large enough for 1 of * the records in that data set, but an 8 to 1 was. If you run this * code with UDF_DEBUG_LEVEL=1 and see lots of Z_BUF_ERROR messages then * you should increase the value of MAX_COMPRESSION_RATIO to compensate. * */ #define UDF_DEBUG_LEVEL 0 #define INITIAL_RAM_ALLOC 524288 // 1/2 MB #define MAX_COMPRESSION_RATIO 4 #define MAX_UNCOMPRESS_BUF_RETRIES 10
/* These must be right or mysqld will not find the symbol! */ extern "C" { my_bool sqz_z_init(UDF_INIT *initid, UDF_ARGS *args, char *message); void sqz_z_deinit(UDF_INIT *initid); char *sqz_z(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error);
my_bool exp_z_init(UDF_INIT *initid, UDF_ARGS *args, char *message); void exp_z_deinit(UDF_INIT *initid); char *exp_z(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); }
/************************************************************************* * The sqz_z* functions. **************************************************************************/
my_bool sqz_z_init(UDF_INIT *initid, UDF_ARGS *args, char *message) { /* Insure that we have the correct number and types of arguments. */ if ( (args->arg_count < 1) || (args->arg_count > 2) || (args->arg_count == 1 && args->arg_type[0] != STRING_RESULT) || (args->arg_count == 2 && (args->arg_type[0] != STRING_RESULT || args->arg_type[1] != INT_RESULT) ) ) { strcpy(message,"Wrong arguments to sqz_z(string,<level>)"); return 1; } int in_len=INITIAL_RAM_ALLOC; if (args->lengths[0] > 0) { initid->ptr=(char *) malloc( (size_t) in_len); if (initid->ptr == 0) { strcpy(message,"No memory available for sqz_z(string,<level>)"); return 1; } } /* Set the max out length to let mysqld what might be coming */ /* Note that his value is not stet in stone, and will often */ /* change later in this code. */ initid->max_length=in_len; if (UDF_DEBUG_LEVEL > 0) fprintf(stderr, "sqz_z_init() InitID=%d, malloc %d bytes\n", (int)initid, in_len); return 0; }
void sqz_z_deinit(UDF_INIT *initid) { if (initid->ptr) { free(initid->ptr); } }
char *sqz_z(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { char *str=args->args[0]; if (!str) // Null argument { *is_null=1; return 0; } if (args->lengths[0] == 0) { // Empty string argument *is_null=1; return 0; }
/* * Determine the compression level to use, 0-9; default == 6. * There is no need to check arg_type for INT_RESULT because it * is done in sqz_z_init() */ int comp_level=6; if (args->arg_count == 2) { int desired_level=(int)*args->args[1]; if (desired_level >=0 && desired_level < 10) { comp_level=desired_level; } }
/* * Before we get too far we need to make sure that we have * allocated enough memory. This is very important as * mysqld does a poor job of giving us the truth in the * udf_init() function _if_ the data we are receiving is * coming from another function (function nesting). In that * case it seems to give us a hard-coded 8Kb. */ unsigned int in_len=args->lengths[0]; /* the zlib manual says to allocate 110% + 12 bytes */ unsigned int mem_buf=uint((in_len * 1.1) + 12); if (mem_buf > initid->max_length) { free(initid->ptr); initid->ptr = (char *) malloc(mem_buf); if (UDF_DEBUG_LEVEL > 0) fprintf(stderr, "sqz_z() InitID=%d, Allocating more memory: %d -> %d\n", (int)initid, initid->max_length, mem_buf); initid->max_length=mem_buf; }
/* * Do the zlib compress and return the results. * Note: setting *error=0 on the error conditions below returns the * error as the value -- good for debugging. Setting *error=1 * makes myqsld stop calling the function, and instead to just place * NULL in this column for the remainder of the rows to be processed, * a behavior which I dislike. */ uLongf destlen=(uLongf) (args->lengths[0] * 1.1) + 12; if (UDF_DEBUG_LEVEL > 9) fprintf(stderr, "About to call compress2() for initid=%d, str=%d, destlen=%d\n", (int)initid, (int)str, (int)destlen); int res=compress2((Bytef*)initid->ptr,&destlen, (Bytef*)str,args->lengths[0],comp_level); if (UDF_DEBUG_LEVEL > 9) fprintf(stderr, "Finished compress2() call for initid=%d, str=%d, destlen=%d\n", (int)initid, (int)str, (int)destlen); if (res == Z_OK) { *length= (ulong) destlen; return initid->ptr; } else if (res == Z_BUF_ERROR) { strcpy(result,"UDF Error: sqz_z failed on compress2() with Z_BUF_ERROR."); *error=0; return result; } else if (res == Z_MEM_ERROR) { strcpy(result,"UDF Error: sqz_z failed on compress2() with Z_MEM_ERROR."); *error=0; return result; } else if (res == Z_STREAM_ERROR) { strcpy(result,"UDF Error: sqz_z failed on compress2() with Z_STREAM_ERROR."); *error=0; return result; } else { strcpy(result,"UDF Error: sqz_z failed on compress()."); *error=0; return result; } }
/************************************************************************* * The exp_z* functions. **************************************************************************/
my_bool exp_z_init(UDF_INIT *initid, UDF_ARGS *args, char *message) { if (args->arg_count != 1 || args->arg_type[0] != STRING_RESULT) { strcpy(message,"Wrong arguments to sqz_z; needs single string."); return 1; } /* Allocate the uncompress memory, if we need it. */ int max_size=INITIAL_RAM_ALLOC; if (max_size > 0) { initid->ptr=(char *) malloc( (size_t) max_size); if (initid->ptr == 0) { strcpy(message,"No memory available for exp_z(string)"); return 1; } } /* Set the max_length (output) to let mysqld what might be coming */ initid->max_length=max_size; if (UDF_DEBUG_LEVEL > 0) fprintf(stderr, "exp_z_init() InitID=%d, malloc %d bytes\n", (int)initid, max_size);
/* Init zlib */ z_stream zls; zls.zalloc = (alloc_func)malloc; zls.zfree = (free_func)free; return 0; }
void exp_z_deinit(UDF_INIT *initid) { if (initid->ptr) { free(initid->ptr); } }
char *exp_z(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { char *str=args->args[0]; if (!str) // Null argument { *is_null=1; return 0; } if (args->lengths[0] == 0) { // Empty string argument *is_null=1; return 0; }
/* * Do the zlib uncompress and return the results. * Note: setting *error=0 on the error conditions below returns the * error as the value -- good for debugging. Setting *error=1 * makes myqsld stop calling the function, and instead to just place * NULL in this column for the remainder of the rows to be processed, * a behavior which I dislike. */ /* This assumes we will never see more than a X to 1 compression ratio. */ unsigned int compression_ratio = MAX_COMPRESSION_RATIO; unsigned int uncompress_tries=0; while (1) { /* * Before we get too far we need to make sure that we have * allocated enough memory. This is very important as * mysqld does a poor job of giving us the truth in the * udf_init() function _if_ the data we are receiving is * coming from another function (function nesting). In that * case it seems to give us a hard-coded 8Kb. */ unsigned int in_len=args->lengths[0]; unsigned int mem_buf=in_len * compression_ratio; if (mem_buf > initid->max_length) { free(initid->ptr); initid->ptr = (char *) malloc(mem_buf); if (UDF_DEBUG_LEVEL > 0) fprintf(stderr, "exp_z() InitID=%d, Allocating more memory: %d -> %d\n", (int)initid, initid->max_length, mem_buf); initid->max_length=mem_buf; }
/* * Do the uncompress() and exit, except in the case of Z_BUF_ERROR, * where we will increase the value of compression_ratio and retry * up to MAX_UNCOMPRESS_BUF_RETRIES times. */ uLongf destlen=(uLongf) initid->max_length; int res=uncompress((Bytef*)initid->ptr,&destlen,(Bytef*)str, args->lengths[0]); if (res == Z_OK) { *length= (ulong) destlen; return initid->ptr; } else if (res == Z_DATA_ERROR) { char msg[]="UDF Error: exp_z failed on uncompress() with Z_DATA_ERROR."; strcpy(result,msg); *error=1; return result; } else if (res == Z_BUF_ERROR) { uncompress_tries++; if (uncompress_tries < MAX_UNCOMPRESS_BUF_RETRIES) { compression_ratio = MAX_COMPRESSION_RATIO * (uncompress_tries + 1); if (UDF_DEBUG_LEVEL > 0) fprintf(stderr, "exp_z() InitID=%d, Z_BUF_ERROR - increasing compression_ratio to %d\n", (int)initid, compression_ratio); continue; } else { sprintf(result,"UDF Error: exp_z failed on uncompress() with Z_BUF_ERROR on a %d byte buffer.", (int)destlen); *error=1; return result; } } else { strcpy(result,"UDF Error: exp_z failed on uncompress()."); *error=1; return result; } } }
#endif /* HAVE_DLOPEN */
=======================================
Linux 용 Make File
=======================================
# # Note: this Makefile was designed for RH Linux 7.3, but should work on # many platforms if you will just alter the paths in IFLAGS and # LFLAGS to match your platform, and possible remove -D__GNUC__ # from CFLAGS if your platform does not use GNU libc. #
CC=gcc CFLAGS=-shared -Wall -O -D__GNUC__ IFLAGS=-I /usr/include -I /usr/include/mysql/ LFLAGS=-L /usr/lib/mysql -lmysqlclient -L /usr/lib -lz VERSION=0.3
SHELL=/bin/sh NOOP = $(SHELL) -c true RM_RF = rm -rf PREOP = @$(NOOP) POSTOP = @$(NOOP) TO_UNIX = @$(NOOP)
SRC=mysqld_udf_zlib.cc OBJ=libmysql_udf_zlib.so SQZ_FUNC=sqz_z EXP_FUNC=exp_z
all: $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(SRC) -o $(OBJ)
install: all cp $(OBJ) /usr/lib ldconfig
load : install echo "CREATE FUNCTION $(SQZ_FUNC) RETURNS STRING SONAME \"$(OBJ)\"" | mysql -f -u root -p echo "CREATE FUNCTION $(EXP_FUNC) RETURNS STRING SONAME \"$(OBJ)\"" | mysql -f -u root -p
drop: echo "DROP FUNCTION $(SQZ_FUNC);" | mysql -f -u root -p echo "DROP FUNCTION $(EXP_FUNC);" | mysql -f -u root -p
clean: -rm -f *.so
|