Commit e28c430e authored by Mark Cave-Ayland's avatar Mark Cave-Ayland
Browse files

Nearly finish work on the new parser API. The key part of this patch is to...

Nearly finish work on the new parser API. The key part of this patch is to alter the behaviour of the parser so that instead of generating errors directly, it returns an error code, error location and an error message. Hence the caller is now in charge of the behaviour when parsing invalid geometries, and so can ignore errors or proceed onto the next geometry if required. The regression test change is due to a change in an error message, since the error is now returned from the unparser before it even gets to GEOS.


git-svn-id: http://svn.osgeo.org/postgis/trunk@3106 b70326c6-7e19-0410-871a-916f4a2858ee
parent 74de73fb
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -1123,8 +1123,21 @@ typedef struct struct_lwgeom_parser_result
{
	uchar *serialized_lwgeom;	/* Pointer to serialized LWGEOM */
	int size;			/* Size of serialized LWGEOM in bytes */
	const char *message;		/* Error/warning message */
	int errlocation;		/* Location of error */
} LWGEOM_PARSER_RESULT;

/*
 * Parser error messages (these must match the message array in lwgparse.c)
 */
#define PARSER_ERROR_MOREPOINTS 	1
#define PARSER_ERROR_ODDPOINTS		2	
#define PARSER_ERROR_UNCLOSED		3 
#define PARSER_ERROR_MIXDIMS		4	
#define PARSER_ERROR_INVALIDGEOM	5
#define PARSER_ERROR_INVALIDWKBTYPE	6


/*
 * Unparser result structure: returns the result of attempting to convert LWGEOM to (E)WKT/(E)WKB 
 */
@@ -1132,8 +1145,18 @@ typedef struct struct_lwgeom_unparser_result
{
	char *wkoutput;			/* Pointer to WKT or WKB output */
	int size;			/* Size of serialized LWGEOM in bytes */
	const char *message;		/* Error/warning message */
	int errlocation;		/* Location of error */
} LWGEOM_UNPARSER_RESULT;

/*
 * Unparser error messages (these must match the message array in lwgunparse.c)
 */
#define UNPARSER_ERROR_MOREPOINTS 	1
#define UNPARSER_ERROR_ODDPOINTS	2	
#define UNPARSER_ERROR_UNCLOSED		3 


/* Parser access routines */
extern char *lwgeom_to_ewkt(LWGEOM *lwgeom, int flags);
extern char *lwgeom_to_hexwkb(LWGEOM *lwgeom, int flags, unsigned int byteorder);
+72 −32
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@

#include "liblwgeom.h"
#include "wktparse.h"
#include "wktparse.tab.h"

/*
 * To get byte order
@@ -43,7 +44,7 @@ typedef void (*read_col_func)(const char **f);

int srid=-1;

static int ferror_occured;
static int parser_ferror_occured;
static allocator local_malloc;
static report_error error_func;

@@ -99,13 +100,18 @@ tuple* free_list=0;
 */
int current_parser_check_flags;

/*
 * Parser current instance result structure - the result structure being used for the current parse
 */
LWGEOM_PARSER_RESULT *current_lwg_parser_result;


/* Parser state flags - these are set automatically by the parser */
int minpoints;
int checkclosed;

/*
 * This inicates if the number of points in the geometry is required to
 * This indicates if the number of points in the geometry is required to
 * be odd (one) or even (zero, currently not enforced) or whatever (-one)
 */
int isodd;
@@ -113,12 +119,50 @@ double *first_point=NULL;
double *last_point=NULL;


/*
 * Parser error messages
 *
 * IMPORTANT: Make sure the order of these messages matches the PARSER_ERROR constants in liblwgeom.h!
 * The 0th element should always be empty since it is unused (error constants start from -1)
 */

const char *parser_error_messages[] = {
	"",
	"geometry requires more points",
	"geometry must have an odd number of points",
	"geometry contains non-closed rings",
	"can not mix dimensionality in a geometry",
	"parse error - invalid geometry",
	"invalid WKB type"
};

/* Macro to return the error message and the current position within WKT */ 
#define LWGEOM_WKT_PARSER_ERROR(errcode) \
	do { \
		if (!parser_ferror_occured) { \
			parser_ferror_occured = -1 * errcode; \
			current_lwg_parser_result->message = parser_error_messages[errcode]; \
			current_lwg_parser_result->errlocation = lwg_parse_yylloc.last_column; \
		} \
	} while (0);


/* Macro to return the error message and the current position within WKB 
   NOTE: the position is handled automatically by strhex_readbyte */ 
#define LWGEOM_WKB_PARSER_ERROR(errcode) \
	do { \
		if (!parser_ferror_occured) { \
			parser_ferror_occured = -1 * errcode; \
			current_lwg_parser_result->message = parser_error_messages[errcode]; \
		} \
	} while (0);


/* External functions */
extern void init_parser(const char *);

/* Prototypes */
tuple* alloc_tuple(output_func of,size_t size);
static void error(const char* err);
void free_tuple(tuple* to_free);
void inc_num(void);
void alloc_stack_tuple(int type,output_func of,size_t size);
@@ -222,13 +266,6 @@ alloc_tuple(output_func of,size_t size)
	return ret;
}

static void
error(const char* err)
{
	error_func(err);
	ferror_occured=1;
}

void
free_tuple(tuple* to_free)
{
@@ -283,14 +320,14 @@ popc(void)
	/* If the minimum point check has been enabled, perform it */
	if (current_parser_check_flags & PARSER_CHECK_MINPOINTS) {
		if ( the_geom.stack->uu.nn.num < minpoints){
			error("geometry requires more points");
			LWGEOM_WKT_PARSER_ERROR(PARSER_ERROR_MOREPOINTS);
		}
	}

	/* If the odd number point check has been enabled, perform it */
	if (current_parser_check_flags & PARSER_CHECK_ODD) {
        	if(isodd != -1 && the_geom.stack->uu.nn.num % 2 != isodd) {
                	error("geometry must have an odd number of points");
                	LWGEOM_WKT_PARSER_ERROR(PARSER_ERROR_ODDPOINTS);
        	}
	}

@@ -300,7 +337,7 @@ popc(void)
			if ( memcmp(first_point, last_point,
				sizeof(double)*the_geom.ndims) )
			{
				error("geometry contains non-closed rings");
				LWGEOM_WKT_PARSER_ERROR(PARSER_ERROR_UNCLOSED);
			}
		}	
	}
@@ -316,7 +353,7 @@ check_dims(int num)

	if( the_geom.ndims != num){
		if (the_geom.ndims) {
			error("Can not mix dimensionality in a geometry");
			LWGEOM_WKT_PARSER_ERROR(PARSER_ERROR_MIXDIMS);
		} else {

                        LWDEBUGF(3, "check_dims: setting dim %d", num);
@@ -627,7 +664,7 @@ alloc_polygon(void)
	else
		alloc_stack_tuple(POLYGONTYPE, write_type,1);

	minpoints=3;
	minpoints=4;
	checkclosed=1;
        isodd=-1;

@@ -639,7 +676,7 @@ alloc_curvepolygon(void)
        LWDEBUG(2, "alloc_curvepolygon called.");

        alloc_stack_tuple(CURVEPOLYTYPE, write_type, 1);
        minpoints=3;
        minpoints=4;
        checkclosed=1;
        isodd=-1;
}
@@ -774,8 +811,7 @@ lwg_parse_yynotice(char* s)
int
lwg_parse_yyerror(char* s)
{
	error("parse error - invalid geometry");
	/* error_func("parse error - invalid geometry"); */
	LWGEOM_WKT_PARSER_ERROR(PARSER_ERROR_INVALIDGEOM);
	return 1;
}

@@ -809,12 +845,18 @@ uchar
strhex_readbyte(const char* in)
{
	if ( *in == 0 ){
		if ( ! ferror_occured){
			error("invalid wkb");
		if ( ! parser_ferror_occured){
			LWGEOM_WKB_PARSER_ERROR(PARSER_ERROR_INVALIDGEOM);
		}
		return 0;
	}

	if (!parser_ferror_occured) {
		lwg_parse_yylloc.last_column++;
		return to_hex[(int)*in]<<4 | to_hex[(int)*(in+1)];
	} else {
		return 0;
	}
}

uchar
@@ -919,10 +961,10 @@ read_wkb_polygon(const char **b)

	/* Read through each ORDINATE_ARRAY in turn */
	while(cnt--){
		if ( ferror_occured )	return;
		if ( parser_ferror_occured )	return;

		/* Things to check for POLYGON ORDINATE_ARRAYs */
		minpoints=3;
		minpoints=4;
		checkclosed=1;
		isodd=-1;

@@ -965,7 +1007,7 @@ read_wkb_ordinate_array(const char **b)
	alloc_counter();

	while(cnt--){
		if ( ferror_occured )	return;
		if ( parser_ferror_occured )	return;
		read_wkb_point(b);
	}

@@ -981,7 +1023,7 @@ read_collection(const char **b, read_col_func f)
	alloc_counter();

	while(cnt--){
		if ( ferror_occured )	return;
		if ( parser_ferror_occured )	return;
		f(b);
	}

@@ -1007,7 +1049,7 @@ parse_wkb(const char **b)
	type = read_wkb_int(b);

	/* quick exit on error */
	if ( ferror_occured ) return;
	if ( parser_ferror_occured ) return;

	the_geom.ndims=2;
	if (type & WKBZOFFSET)
@@ -1102,7 +1144,7 @@ parse_wkb(const char **b)
			break;

		default:
			error("Invalid type in wbk");
			LWGEOM_WKB_PARSER_ERROR(PARSER_ERROR_INVALIDWKBTYPE);
	}

	the_geom.from_lwgi=0;
@@ -1130,9 +1172,10 @@ parse_it(LWGEOM_PARSER_RESULT *lwg_parser_result, const char *geometry, int flag
	local_malloc = allocfunc;
	error_func=errfunc;

	ferror_occured = 0;
	parser_ferror_occured = 0;

	/* Setup the inital parser flags and empty the return struct */
	current_lwg_parser_result = lwg_parser_result;
	current_parser_check_flags = flags;
	lwg_parser_result->serialized_lwgeom = NULL;
	lwg_parser_result->size = 0;
@@ -1143,13 +1186,10 @@ parse_it(LWGEOM_PARSER_RESULT *lwg_parser_result, const char *geometry, int flag

	close_parser();

	if (ferror_occured)
		return 0;

	/* Return the parsed geometry */
	make_serialized_lwgeom(lwg_parser_result);

	return -1;
	return parser_ferror_occured;
}

int
+91 −31
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ uchar* output_wkb(uchar* geom);

/*-- Globals ----------------------------------------------- */

static int unparser_ferror_occured;
static int dims;
static allocator local_malloc;
static freeor local_free;
@@ -81,9 +82,43 @@ void (*write_wkb_bytes)(uchar* ptr,unsigned int cnt,size_t size);
int current_unparser_check_flags;

/*
 * Unparser result structure
 * Unparser current instance result structure - the result structure being used for the current unparse
 */
LWGEOM_UNPARSER_RESULT *unparser_result;
LWGEOM_UNPARSER_RESULT *current_lwg_unparser_result;

/*
 * Unparser error messages
 *
 * IMPORTANT: Make sure the order of these messages matches the UNPARSER_ERROR constants in liblwgeom.h!
 * The 0th element should always be empty since it is unused (error constants start from -1)
 */

const char *unparser_error_messages[] = {
        "",
        "geometry requires more points",
	"geometry must have an odd number of points",
        "geometry contains non-closed rings"
};

/* Macro to return the error message and the current position within WKT */
#define LWGEOM_WKT_UNPARSER_ERROR(errcode) \
        do { \
		if (!unparser_ferror_occured) { \
                	unparser_ferror_occured = -1 * errcode; \
                	current_lwg_unparser_result->message = unparser_error_messages[errcode]; \
                	current_lwg_unparser_result->errlocation = (out_pos - out_start); \
		} \
        } while (0);

/* Macro to return the error message and the current position within WKB */
#define LWGEOM_WKB_UNPARSER_ERROR(errcode) \
        do { \
		if (!unparser_ferror_occured) { \
                	unparser_ferror_occured = -1 * errcode; \
                	current_lwg_unparser_result->message = unparser_error_messages[errcode]; \
                	current_lwg_unparser_result->errlocation = (out_pos - out_start); \
		} \
        } while (0);

/*---------------------------------------------------------- */

@@ -241,10 +276,7 @@ uchar *
output_line_collection(uchar* geom,outfunc func,int supress)
{
	int cnt = read_int(&geom);

	/* Ensure that LINESTRING has a minimum of 2 points */
	if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && cnt < 2)
		lwerror("geometry requires more points");
	int orig_cnt = cnt;

	if ( cnt == 0 ){
		write_str(" EMPTY");
@@ -259,6 +291,11 @@ output_line_collection(uchar* geom,outfunc func,int supress)
		}
		write_str(")");
	}

	/* Ensure that LINESTRING has a minimum of 2 points */
	if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 2)
		LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);

	return geom;
}

@@ -272,6 +309,7 @@ output_polygon_ring_collection(uchar* geom,outfunc func,int supress)
        double last_point[dims];

	int cnt = read_int(&geom);
	int orig_cnt = cnt;
	if ( cnt == 0 ){
		write_str(" EMPTY");
	}
@@ -309,8 +347,11 @@ output_polygon_ring_collection(uchar* geom,outfunc func,int supress)
        	/* Check if they are the same... */
        	if (memcmp(&first_point, &last_point, sizeof(double) * dims) &&
			(current_unparser_check_flags & PARSER_CHECK_CLOSURE))
                	lwerror("geometry contains non-closed rings");
                	LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_UNCLOSED);	

		/* Ensure that POLYGON has a minimum of 4 points */
        	if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 4)
                	LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);
	}
	return geom;
}
@@ -320,14 +361,7 @@ uchar *
output_curve_collection(uchar* geom,outfunc func,int supress)
{
	int cnt = read_int(&geom);

	/* Ensure that a CIRCULARSTRING has a minimum of 3 points */
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && cnt < 3)
                lwerror("geometry requires more points");

	/* Ensure that a CIRCULARSTRING has an odd number of points */
        if ((current_unparser_check_flags & PARSER_CHECK_ODD) && cnt % 2 != 1)
                lwerror("geometry must have an odd number of points");
	int orig_cnt = cnt;

	if ( cnt == 0 ){
		write_str(" EMPTY");
@@ -342,6 +376,17 @@ output_curve_collection(uchar* geom,outfunc func,int supress)
		}
		write_str(")");
	}

	/* Ensure that a CIRCULARSTRING has a minimum of 3 points */
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 3) {
                LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);
	}

	/* Ensure that a CIRCULARSTRING has an odd number of points */
        if ((current_unparser_check_flags & PARSER_CHECK_ODD) && orig_cnt % 2 != 1) {
                LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_ODDPOINTS);
	}

	return geom;
}

@@ -579,11 +624,12 @@ unparse_WKT(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allo
		return 0;

	/* Setup the inital parser flags and empty the return struct */
	current_lwg_unparser_result = lwg_unparser_result;
        current_unparser_check_flags = flags;
	lwg_unparser_result->wkoutput = NULL;
        lwg_unparser_result->size = 0;

	unparser_result = lwg_unparser_result;
	unparser_ferror_occured = 0;
	local_malloc=alloc;
	local_free=free;
	len = 128;
@@ -596,7 +642,7 @@ unparse_WKT(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allo
	lwg_unparser_result->wkoutput = out_start;
	lwg_unparser_result->size = strlen(out_start);

	return -1;
	return unparser_ferror_occured;
}

static char outchr[]={"0123456789ABCDEF" };
@@ -707,15 +753,18 @@ uchar *
output_wkb_line_collection(uchar* geom,outwkbfunc func)
{
	int cnt = read_int(&geom);
	int orig_cnt = cnt;

	LWDEBUGF(2, "output_wkb_line_collection: %d iterations loop", cnt);

	/* Ensure that LINESTRING has a minimum of 2 points */
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && cnt < 2)
                lwerror("geometry requires more points");

	write_wkb_int(cnt);
	while(cnt--) geom=func(geom);

	/* Ensure that LINESTRING has a minimum of 2 points */
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 2) {
		LWGEOM_WKB_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);
	}

	return geom;
}

@@ -729,6 +778,7 @@ output_wkb_polygon_ring_collection(uchar* geom,outwkbfunc func)
	double last_point[dims];

	int cnt = read_int(&geom);
	int orig_cnt = cnt;

	LWDEBUGF(2, "output_wkb_polygon_ring_collection: %d iterations loop", cnt);

@@ -758,8 +808,13 @@ output_wkb_polygon_ring_collection(uchar* geom,outwkbfunc func)

	/* Check if they are the same... */
	if (memcmp(&first_point, &last_point, sizeof(double) * dims) &&
		(current_unparser_check_flags & PARSER_CHECK_CLOSURE))
		lwerror("geometry contains non-closed rings");
		(current_unparser_check_flags & PARSER_CHECK_CLOSURE)) {
		LWGEOM_WKB_UNPARSER_ERROR(UNPARSER_ERROR_UNCLOSED);
	}

	/* Ensure that POLYGON has a minimum of 4 points */
	if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 4)
		LWGEOM_WKT_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);

	return geom;
}
@@ -778,19 +833,23 @@ uchar *
output_wkb_curve_collection(uchar* geom,outwkbfunc func)
{
	int cnt = read_int(&geom);
	int orig_cnt = cnt;

	LWDEBUGF(2, "output_wkb_curve_collection: %d iterations loop", cnt);

	write_wkb_int(cnt);
	while(cnt--) geom=func(geom);

	/* Ensure that a CIRCULARSTRING has a minimum of 3 points */
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && cnt < 3)
                lwerror("geometry requires more points");
        if ((current_unparser_check_flags & PARSER_CHECK_MINPOINTS) && orig_cnt < 3) {
		LWGEOM_WKB_UNPARSER_ERROR(UNPARSER_ERROR_MOREPOINTS);
	}

	/* Ensure that a CIRCULARSTRING has an odd number of points */
        if ((current_unparser_check_flags & PARSER_CHECK_ODD) && cnt % 2 != 1)
                lwerror("geometry must have an odd number of points");
        if ((current_unparser_check_flags & PARSER_CHECK_ODD) && orig_cnt % 2 != 1) {
		LWGEOM_WKB_UNPARSER_ERROR(UNPARSER_ERROR_ODDPOINTS);
	}

	write_wkb_int(cnt);
	while(cnt--) geom=func(geom);
	return geom;
}

@@ -893,11 +952,12 @@ unparse_WKB(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allo
		return 0;

	/* Setup the inital parser flags and empty the return struct */
	current_lwg_unparser_result = lwg_unparser_result;
        current_unparser_check_flags = flags;
	lwg_unparser_result->wkoutput = NULL;
	lwg_unparser_result->size = 0;

	unparser_result = lwg_unparser_result;
	unparser_ferror_occured = 0;
	local_malloc=alloc;
	local_free=free;
	len = 128;
@@ -936,7 +996,7 @@ unparse_WKB(LWGEOM_UNPARSER_RESULT *lwg_unparser_result, uchar* serialized, allo
	lwg_unparser_result->wkoutput = out_start;
	lwg_unparser_result->size = (out_pos-out_start);

	return -1;	
	return unparser_ferror_occured;	
}


+5 −0
Original line number Diff line number Diff line
@@ -23,12 +23,17 @@ typedef struct serialized_lwgeom {
typedef struct struct_lwgeom_parser_result
{
        SERIALIZED_LWGEOM *serialized_lwgeom;
        int size;
        const char *message;
        int errlocation;
} LWGEOM_PARSER_RESULT;

typedef struct struct_lwgeom_unparser_result
{
        char *wkoutput;
        int size;
	const char *message;
        int errlocation;
} LWGEOM_UNPARSER_RESULT;
#endif
typedef void* (*allocator)(size_t size);
+2 −0
Original line number Diff line number Diff line
@@ -2949,6 +2949,8 @@ Datum LWGEOM_asEWKT(PG_FUNCTION_ARGS)
	lwgeom = (PG_LWGEOM *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0));

	result = serialized_lwgeom_to_ewkt(&lwg_unparser_result, SERIALIZED_FORM(lwgeom), PARSER_CHECK_ALL);
	if (result)
		PG_UNPARSER_ERROR(lwg_unparser_result);

#if 0
	semicolonLoc = strchr(lwg_unparser_result.wkb,';');
Loading