Commit 15cd5944 authored by Grégory Mantelet's avatar Grégory Mantelet
Browse files

[ADQL] Add to the parser a function attempting to quickly fix an ADQL query.

This new function - ADQLParser.tryQuickFix(...) - fixes the most common issues
with ADQL queries:

- replace Unicode confusable characters by their ASCII/UTF-8 version,
- double-quote SQL reserved words/terms (e.g. `public`, `year`, `date`),
- double-quote ADQL function names used a column name/alias (e.g. `distance`,
  `min`, `avg`),
- double-quote invalid regular identifiers (e.g. `_RAJ2000`, `2mass`).

The last point is far from being perfect but should work at least for
identifiers starting with a digit or an underscore, or an identifier including
one of the following character: `?`, `!`, `$`, `@`, `#`, `{`, `}`, `[`, `]`,
`~`, `^` and '`'.

It should also been noted that double-quoting a column/table name will make it
case-sensitive. Then, it is possible that the query does not pass even after the
double-quote operation ; the case would have to be checked by the user.

Finally, there is no attempt to fix column and table names (i.e. case
sensitivity and/or typos) using tables/columns list/metadata. That could be a
possible evolution of this function or an additional feature to implement in the
parser.
parent 03d4dc14
Loading
Loading
Loading
Loading
+1233 −922

File changed.

Preview size limit exceeded, changes collapsed.

+12 −12
Original line number Diff line number Diff line
@@ -189,19 +189,19 @@ public interface ADQLParserConstants {
  /** RegularExpression Id. */
  int STRING_LITERAL = 93;
  /** RegularExpression Id. */
  int DELIMITED_IDENTIFIER = 96;
  int SCIENTIFIC_NUMBER = 94;
  /** RegularExpression Id. */
  int REGULAR_IDENTIFIER = 97;
  int UNSIGNED_FLOAT = 95;
  /** RegularExpression Id. */
  int Letter = 98;
  int UNSIGNED_INTEGER = 96;
  /** RegularExpression Id. */
  int SCIENTIFIC_NUMBER = 99;
  int DIGIT = 97;
  /** RegularExpression Id. */
  int UNSIGNED_FLOAT = 100;
  int DELIMITED_IDENTIFIER = 100;
  /** RegularExpression Id. */
  int UNSIGNED_INTEGER = 101;
  int REGULAR_IDENTIFIER_CANDIDATE = 101;
  /** RegularExpression Id. */
  int DIGIT = 102;
  int Letter = 102;

  /** Lexical state. */
  int DEFAULT = 0;
@@ -306,15 +306,15 @@ public interface ADQLParserConstants {
    "\"\\\'\"",
    "<token of kind 92>",
    "\"\\\'\"",
    "\"\\\"\"",
    "<token of kind 95>",
    "\"\\\"\"",
    "<REGULAR_IDENTIFIER>",
    "<Letter>",
    "<SCIENTIFIC_NUMBER>",
    "<UNSIGNED_FLOAT>",
    "<UNSIGNED_INTEGER>",
    "<DIGIT>",
    "\"\\\"\"",
    "<token of kind 99>",
    "\"\\\"\"",
    "<REGULAR_IDENTIFIER_CANDIDATE>",
    "<Letter>",
  };

}
+2198 −1609

File changed.

Preview size limit exceeded, changes collapsed.

+24 −8
Original line number Diff line number Diff line
@@ -15,6 +15,11 @@
 *     - addition of a HINT in the error message when an ADQL or SQL reserved
 *       word is at the origin of the error (see initialise(...))
 *
 * Modified by Gr&eacute;gory Mantelet (CDS), on March 2019
 * Modifications:
 *     - addition of a constructor with a TokenMgrError which adds a piece of
 *       advice to fix the token issue (see buildExpandedMessage(...))
 *
 * /!\ DO NOT RE-GENERATE THIS FILE /!\
 * In case of re-generation, replace it by ParseException.java.backup (but maybe
 * after a diff in case of significant modifications have been done by a new
@@ -81,6 +86,17 @@ public class ParseException extends Exception {
		position = errorPosition;
	}

	public ParseException(TokenMgrError err){
		this(buildExpandedMessage(err), new TextPosition(err.getErrorLine(), err.getErrorColumn()));
	}

	private final static String buildExpandedMessage(final TokenMgrError err){
		if (err.getMessage().indexOf("<EOF>") > 0)
			return err.getMessage() + "! Possible cause: a string between single or double quotes which is never closed (solution: well...just close it!).";
		else
			return err.getMessage() + "! Possible cause: a non-ASCI/UTF-8 character (solution: remove/replace it).";
	}

	/**
	 * This is the last token that has been consumed successfully.  If
	 * this object has been created due to a parse error, the token
+24 −8
Original line number Diff line number Diff line
@@ -15,6 +15,11 @@
 *     - addition of a HINT in the error message when an ADQL or SQL reserved
 *       word is at the origin of the error (see initialise(...))
 *
 * Modified by Gr&eacute;gory Mantelet (CDS), on March 2019
 * Modifications:
 *     - addition of a constructor with a TokenMgrError which adds a piece of
 *       advice to fix the token issue (see buildExpandedMessage(...))
 *
 * /!\ DO NOT RE-GENERATE THIS FILE /!\
 * In case of re-generation, replace it by ParseException.java.backup (but maybe
 * after a diff in case of significant modifications have been done by a new
@@ -81,6 +86,17 @@ public class ParseException extends Exception {
		position = errorPosition;
	}

	public ParseException(TokenMgrError err){
		this(buildExpandedMessage(err), new TextPosition(err.getErrorLine(), err.getErrorColumn()));
	}

	private final static String buildExpandedMessage(final TokenMgrError err){
		if (err.getMessage().indexOf("<EOF>") > 0)
			return err.getMessage() + "! Possible cause: a string between single or double quotes which is never closed (solution: well...just close it!).";
		else
			return err.getMessage() + "! Possible cause: a non-ASCI/UTF-8 character (solution: remove/replace it).";
	}

	/**
	 * This is the last token that has been consumed successfully.  If
	 * this object has been created due to a parse error, the token
Loading