Newer
Older
/* Generated By:JavaCC: Do not edit this line. ADQLParser.java */
package adql.parser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Stack;
import java.util.Vector;
import adql.db.exception.UnresolvedIdentifiersException;
import adql.parser.ADQLQueryFactory.JoinType;
import adql.parser.IdentifierItems.IdentifierItem;
gmantele
committed
import adql.query.ADQLOrder;
import adql.query.ADQLQuery;
import adql.query.ClauseADQL;
import adql.query.ClauseConstraints;
import adql.query.ClauseSelect;
import adql.query.SelectAllColumns;
import adql.query.SelectItem;
import adql.query.TextPosition;
import adql.query.constraint.ADQLConstraint;
import adql.query.constraint.Between;
import adql.query.constraint.Comparison;
import adql.query.constraint.ComparisonOperator;
import adql.query.constraint.ConstraintsGroup;
import adql.query.constraint.Exists;
import adql.query.constraint.In;
import adql.query.constraint.IsNull;
import adql.query.constraint.NotConstraint;
import adql.query.from.ADQLJoin;
gmantele
committed
import adql.query.from.FromContent;
import adql.query.operand.ADQLColumn;
import adql.query.operand.ADQLOperand;
import adql.query.operand.Concatenation;
import adql.query.operand.NegativeOperand;
import adql.query.operand.NumericConstant;
import adql.query.operand.Operation;
import adql.query.operand.OperationType;
import adql.query.operand.StringConstant;
import adql.query.operand.WrappedOperand;
import adql.query.operand.function.ADQLFunction;
import adql.query.operand.function.MathFunction;
import adql.query.operand.function.MathFunctionType;
import adql.query.operand.function.SQLFunction;
import adql.query.operand.function.SQLFunctionType;
import adql.query.operand.function.UserDefinedFunction;
import adql.query.operand.function.geometry.GeometryFunction;
import adql.query.operand.function.geometry.GeometryFunction.GeometryValue;
import adql.query.operand.function.geometry.PointFunction;
import adql.translator.PostgreSQLTranslator;
import adql.translator.TranslationException;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
* Parses an ADQL query thanks to the {@link ADQLParser#Query()} function.
*
* <p>
* This parser is able, thanks to a {@link QueryChecker} object, to check each
* {@link ADQLQuery} just after its generation. It could be used to check the
* consistency between the ADQL query to parse and the "database" on which the
* query must be executed. By default, there is no {@link QueryChecker}. Thus
* you must extend {@link QueryChecker} to check semantically all generated
* ADQLQuery objects.
* </p>
*
* <p>
* To create an object representation of the given ADQL query, this parser uses
* a {@link ADQLQueryFactory} object. So if you want customize some object
* (ie. CONTAINS) of this representation you just have to extend the
* corresponding default object (ie. ContainsFunction) and to extend the
* corresponding function of {@link ADQLQueryFactory}
* (ie. createContains(...)).
* </p>
*
* <p>Here are the key functions to use:</p>
* <ul>
* <li>{@link #parseQuery(java.lang.String)} (or any of its alternatives)
* to parse an input ADQL query String and get its corresponding ADQL tree
* </li>
* <li>{@link #tryQuickFix(java.lang.String)} to try fixing the most common
* issues with ADQL queries (e.g. Unicode confusable characters,
* unescaped ADQL identifiers, SQL reserved keywords, ...)</li>
* </ul>
*
* <p><b><u>WARNING:</u>
* To modify this class it's strongly encouraged to modify the .jj file in the
* section between <i>PARSER_BEGIN</i> and <i>PARSER_END</i> and to re-compile
* it with JavaCC.
* </b></p>
*
* @see QueryChecker
* @see ADQLQueryFactory
*
* @author Grégory Mantelet (CDS;ARI)
* @version 1.5-2 (10/2020)
*/
public class ADQLParser implements ADQLParserConstants {
/** Tools to build the object representation of the ADQL query. */
private ADQLQueryFactory queryFactory = new ADQLQueryFactory();
/** The stack of queries (because there may be some sub-queries). */
private Stack<ADQLQuery> stackQuery = new Stack<ADQLQuery>();
/** The object representation of the ADQL query to parse.
* (ONLY USED DURING THE PARSING, else it is always <i>null</i>). */
private ADQLQuery query = null;
/** Checks each {@link ADQLQuery} (sub-query or not) just after their
* generation. */
private QueryChecker queryChecker = null;
/** The first token of a table/column name. This token is extracted by
* {@link #Identifier()}. */
private Token currentIdentifierToken = null;
/**
* Builds an ADQL parser without a query to parse.
*/
Grégory Mantelet
committed
public ADQLParser() {
this(new java.io.ByteArrayInputStream("".getBytes()));
setDebug(false);
}
/**
* Builds an ADQL parser without a query to parse but with a
* {@link QueryChecker} and a {@link ADQLQueryFactory}.
*
* @param checker The object to use to check each {@link ADQLQuery}.
* @param factory The object to use to build an object representation of
* the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(QueryChecker checker, ADQLQueryFactory factory) {
this();
queryChecker = checker;
if (factory != null)
queryFactory = factory;
}
/**
* Builds an ADQL parser without a query to parse but with a
* {@link QueryChecker}.
*
* @param checker The object to use to check each {@link ADQLQuery}.
*/
Grégory Mantelet
committed
public ADQLParser(QueryChecker checker) {
this(checker, null);
}
/**
* Builds an ADQL parser without a query to parse but with a
* {@link ADQLQueryFactory}.
*
* @param factory The object to use to build an object representation of
* the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(ADQLQueryFactory factory) {
this((QueryChecker)null, factory);
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param checker The object to use to check each {@link ADQLQuery}.
* @param factory The object to use to build an object representation of
* the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, QueryChecker checker, ADQLQueryFactory factory) {
this(stream);
setDebug(false);
setDebug(false);
queryChecker = checker;
if (factory != null)
queryFactory = factory;
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param checker The object to use to check each {@link ADQLQuery}.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, QueryChecker checker) {
this(stream, checker, null);
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param factory The object to use to build an object representation of
* the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, ADQLQueryFactory factory) {
this(stream, (QueryChecker)null, factory);
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param encoding The supplied encoding.
* @param checker The object to use to check each {@link ADQLQuery}.
* @param factory The object to use to build an object representation
* of the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, String encoding, QueryChecker checker, ADQLQueryFactory factory) {
this(stream, encoding);
setDebug(false);
queryChecker = checker;
if (factory != null)
queryFactory = factory;
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param encoding The supplied encoding.
* @param checker The object to use to check each {@link ADQLQuery}.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, String encoding, QueryChecker checker) {
this(stream, encoding, checker, null);
}
/**
* Builds a parser with a stream containing the query to parse.
*
* @param stream The stream in which the ADQL query to parse is given.
* @param encoding The supplied encoding.
* @param factory The object to use to build an object representation
* of the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.InputStream stream, String encoding, ADQLQueryFactory factory) {
this(stream, encoding, null, factory);
}
/**
* Builds a parser with a reader containing the query to parse.
*
* @param reader The reader in which the ADQL query to parse is given.
* @param checker The object to use to check each {@link ADQLQuery}.
* @param factory The object to use to build an object representation
* of the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.Reader reader, QueryChecker checker, ADQLQueryFactory factory) {
this(reader);
setDebug(false);
setDebug(false);
queryChecker = checker;
if (factory != null)
queryFactory = factory;
}
/**
* Builds a parser with a reader containing the query to parse.
*
* @param reader The reader in which the ADQL query to parse is given.
* @param checker The object to use to check each {@link ADQLQuery}.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.Reader reader, QueryChecker checker) {
this(reader, checker, null);
}
/**
* Builds a parser with a reader containing the query to parse.
*
* @param reader The reader in which the ADQL query to parse is given.
* @param factory The object to use to build an object representation
* of the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(java.io.Reader reader, ADQLQueryFactory factory) {
this(reader, null, factory);
}
/**
* Builds a parser with another token manager.
*
* @param tm The manager which associates a token to a numeric code.
* @param checker The object to use to check each {@link ADQLQuery }.
* @param factory The object to use to build an object representation
* of the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(ADQLParserTokenManager tm, QueryChecker checker, ADQLQueryFactory factory) {
this(tm);
setDebug(false);
setDebug(false);
queryChecker = checker;
if (factory != null)
queryFactory = factory;
}
/**
* Builds a parser with another token manager.
*
* @param tm The manager which associates a token to a numeric code.
* @param checker The object to use to check each {@link ADQLQuery}.
*/
Grégory Mantelet
committed
public ADQLParser(ADQLParserTokenManager tm, QueryChecker checker) {
this(tm, checker, null);
}
/**
* Builds a parser with another token manager.
*
* @param tm The manager which associates a token to a numeric code.
* @param factory The object to use to build an object representation of
* the given ADQL query.
*/
Grégory Mantelet
committed
public ADQLParser(ADQLParserTokenManager tm, ADQLQueryFactory factory) {
this(tm, null, factory);
}
/* ADDITIONAL GETTERS & SETTERS */
Grégory Mantelet
committed
public final void setDebug(boolean debug) {
if (debug)
enable_tracing();
else
disable_tracing();
}
Grégory Mantelet
committed
public final QueryChecker getQueryChecker() {
return queryChecker;
}
Grégory Mantelet
committed
public final void setQueryChecker(QueryChecker checker) {
queryChecker = checker;
}
Grégory Mantelet
committed
public final ADQLQueryFactory getQueryFactory() {
return queryFactory;
}
Grégory Mantelet
committed
public final void setQueryFactory(ADQLQueryFactory factory) {
queryFactory = (factory != null) ? factory : (new ADQLQueryFactory());
}
/* EXCEPTION HELPER FUNCTION */
Grégory Mantelet
committed
private final ParseException generateParseException(Exception ex) {
if (!(ex instanceof ParseException)) {
ParseException pex = new ParseException("[" + ex.getClass().getName() + "] " + ex.getMessage());
pex.setStackTrace(ex.getStackTrace());
return pex;
Grégory Mantelet
committed
} else
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
return (ParseException)ex;
}
/* QUERY PARSING FUNCTIONS */
/**
* Tell whether the given string is a valid ADQL regular identifier.
*
* <p>
* According to the ADQL-2.0's BNF, a regular identifier (i.e. not delimited
* ; not between double quotes) must be a letter followed by a letter, digit
* or underscore. So, the following regular expression:
* </p>
* <pre>[a-zA-Z]+[a-zA-Z0-9_]*</pre>
*
* <p>This is what this function tests on the given string.</p>
*
* @param idCandidate The string to test.
*
* @return <code>true</code> if the given string is a valid regular
* identifier,
* <code>false</code> otherwise.
*
* @see #testRegularIdentifier(adql.parser.Token)
*
* @since 1.5
*/
Grégory Mantelet
committed
public final boolean isRegularIdentifier(final String idCandidate) {
return idCandidate.matches("[a-zA-Z]+[a-zA-Z0-9_]*");
}
/**
* Test the given token as an ADQL's regular identifier.
*
* <p>
* This function uses {@link #isRegularIdentifier(java.lang.String)} to
* test the given token's image. If the test fails, a
* {@link adql.parser.ParseException} is thrown.
* </p>
*
* @param token The token to test.
*
* @throws ParseException If the given token is not a valid ADQL regular
* identifier.
*
* @see #isRegularIdentifier(java.lang.String)
*
* @since 1.5
*/
Grégory Mantelet
committed
public final void testRegularIdentifier(final Token token) throws ParseException {
if (!isRegularIdentifier(token.image))
throw new ParseException("Invalid ADQL regular identifier: \u005c"" + token.image + "\u005c"! If it aims to be a column/table name/alias, you should write it between double quotes.", new TextPosition(token));
}
/**
* Parses the query given at the creation of this parser or in the
* <i>ReInit</i> functions.
*
* @return The object representation of the given ADQL query.
*
* @throws ParseException If there is at least one syntactic error.
*
* @see ADQLParser#Query()
*/
Grégory Mantelet
committed
public final ADQLQuery parseQuery() throws ParseException {
stackQuery.clear();
query = null;
Grégory Mantelet
committed
try {
Grégory Mantelet
committed
} catch(TokenMgrError tme) {
throw new ParseException(tme);
}
}
/**
* Parses the query given in parameter.
*
* @param q The ADQL query to parse.
*
* @return The object representation of the given ADQL query.
*
* @throws ParseException If there is at least one syntactic error.
*
* @see ADQLParser#ReInit(java.io.InputStream)
* @see ADQLParser#setDebug(boolean)
* @see ADQLParser#Query()
*/
Grégory Mantelet
committed
public final ADQLQuery parseQuery(String q) throws ParseException {
stackQuery.clear();
query = null;
ReInit(new java.io.ByteArrayInputStream(q.getBytes()));
Grégory Mantelet
committed
try {
Grégory Mantelet
committed
} catch(TokenMgrError tme) {
throw new ParseException(tme);
}
}
/**
* Parses the query contained in the stream given in parameter.
*
* @param stream The stream which contains the ADQL query to parse.
*
* @return The object representation of the given ADQL query.
*
* @throws ParseException If there is at least one syntactic error.
*
* @see ADQLParser#ReInit(java.io.InputStream)
* @see ADQLParser#setDebug(boolean)
* @see ADQLParser#Query()
*/
Grégory Mantelet
committed
public final ADQLQuery parseQuery(java.io.InputStream stream) throws ParseException {
stackQuery.clear();
query = null;
ReInit(stream);
Grégory Mantelet
committed
try {
Grégory Mantelet
committed
} catch(TokenMgrError tme) {
throw new ParseException(tme);
}
}
/* CORRECTION SUGGESTION */
Grégory Mantelet
committed
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
/**
* Try fixing tokens/terms of the input ADQL query.
*
* <p>
* <b>This function does not try to fix syntactical or semantical errors.</b>
* It just try to fix the most common issues in ADQL queries, such as:
* </p>
* <ul>
* <li>some Unicode characters confusable with ASCII characters (like a
* space, a dash, ...) ; this function replace them by their ASCII
* alternative,</li>
* <li>any of the following are double quoted:
* <ul>
* <li>non regular ADQL identifiers
* (e.g. <code>_RAJ2000</code>),</li>
* <li>ADQL function names used as identifiers
* (e.g. <code>distance</code>)</li>
* <li>and SQL reserved keywords
* (e.g. <code>public</code>).</li>
* </ul>
* </li>
* </ul>
*
* <p><i><b>Note 1:</b>
* The given stream is NOT closed by this function even if the EOF is
* reached. It is the responsibility of the caller to close it.
* </i></p>
*
* <p><i><b>Note 2:</b>
* This function does not use any instance variable of this parser
* (especially the InputStream or Reader provided at initialisation or
* ReInit).
* </i></p>
*
* @param input Stream containing the input ADQL query to fix.
*
* @return The suggested correction of the input ADQL query.
*
* @throws java.io.IOException If there is any error while reading from the
* given input stream.
* @throws ParseException If any unrecognised character is encountered,
* or if anything else prevented the tokenization
* of some characters/words/terms.
*
* @see #tryQuickFix(java.lang.String)
*
* @since 1.5
*/
Grégory Mantelet
committed
public final String tryQuickFix(final java.io.InputStream input) throws java.io.IOException, ParseException {
// Fetch everything into a single string:
StringBuffer buf = new StringBuffer();
byte[] cBuf = new byte[1024];
int nbChar;
Grégory Mantelet
committed
while((nbChar = input.read(cBuf)) > -1) {
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
buf.append(new String(cBuf, 0, nbChar));
}
// Convert the buffer into a String and now try to fix it:
return tryQuickFix(buf.toString());
}
/**
* Try fixing tokens/terms of the given ADQL query.
*
* <p>
* <b>This function does not try to fix syntactical or semantical errors.</b>
* It just try to fix the most common issues in ADQL queries, such as:
* </p>
* <ul>
* <li>some Unicode characters confusable with ASCII characters (like a
* space, a dash, ...) ; this function replace them by their ASCII
* alternative,</li>
* <li>any of the following are double quoted:
* <ul>
* <li>non regular ADQL identifiers
* (e.g. <code>_RAJ2000</code>),</li>
* <li>ADQL function names used as identifiers
* (e.g. <code>distance</code>)</li>
* <li>and SQL reserved keywords
* (e.g. <code>public</code>).</li>
* </ul>
* </li>
* </ul>
*
* <p><i><b>Note:</b>
* This function does not use any instance variable of this parser
* (especially the InputStream or Reader provided at initialisation or
* ReInit).
* </i></p>
*
* @param adqlQuery The input ADQL query to fix.
*
* @return The suggested correction of the given ADQL query.
*
* @throws ParseException If any unrecognised character is encountered,
* or if anything else prevented the tokenization
* of some characters/words/terms.
*
* @since 1.5
*/
Grégory Mantelet
committed
public String tryQuickFix(String adqlQuery) throws ParseException {
StringBuffer suggestedQuery = new StringBuffer();
Grégory Mantelet
committed
// 1. Replace all Unicode confusable characters:
adqlQuery = replaceUnicodeConfusables(adqlQuery);
Grégory Mantelet
committed
/* 1.bis. Normalise new lines and tabulations
* (to simplify the column counting): */
adqlQuery = adqlQuery.replaceAll("(\u005cr\u005cn|\u005cr|\u005cn)", System.getProperty("line.separator")).replaceAll("\u005ct", " ");
Grégory Mantelet
committed
// 2. Analyse the query token by token:
ADQLParserTokenManager parser = new ADQLParserTokenManager(new SimpleCharStream(new java.io.ByteArrayInputStream(adqlQuery.getBytes())));
Grégory Mantelet
committed
final String[] lines = adqlQuery.split(System.getProperty("line.separator"));
Grégory Mantelet
committed
Grégory Mantelet
committed
try {
String suggestedToken;
int lastLine = 1, lastCol = 1;
Grégory Mantelet
committed
Token token = null, nextToken = parser.getNextToken();
// for all tokens until the EOF or EOQ:
Grégory Mantelet
committed
do {
// get the next token:
token = nextToken;
nextToken = (isEnd(token) ? null : parser.getNextToken());
Grégory Mantelet
committed
// 3. Double quote any suspect token:
Grégory Mantelet
committed
if (mustEscape(token, nextToken)) {
suggestedToken = "\u005c"" + token.image + "\u005c"";
Grégory Mantelet
committed
} else
suggestedToken = token.image;
Grégory Mantelet
committed
/* 4. Append all space characters (and comments) before the
* token: */
/* same line, just get the space characters between the last
* token and the one to append: */
Grégory Mantelet
committed
if (lastLine == token.beginLine) {
if (token.kind == ADQLParserConstants.EOF)
suggestedQuery.append(lines[lastLine - 1].substring(lastCol - 1));
else
suggestedQuery.append(lines[lastLine - 1].substring(lastCol - 1, token.beginColumn - (isEnd(token) ? 0 : 1)));
lastCol = token.endColumn + 1;
}
// not the same line...
Grégory Mantelet
committed
else {
/* append all remaining space characters until the position
* of the token to append: */
Grégory Mantelet
committed
do {
suggestedQuery.append(lines[lastLine - 1].substring(lastCol - 1)).append('\u005cn');
lastLine++;
lastCol = 1;
Grégory Mantelet
committed
} while(lastLine < token.beginLine);
/* if there are still space characters before the token,
* append them as well: */
if (lastCol < token.beginColumn)
suggestedQuery.append(lines[lastLine - 1].substring(lastCol - 1, token.beginColumn - 1));
// finally, set the correct column position:
lastCol = token.endColumn + 1;
}
Grégory Mantelet
committed
// 5. Append the suggested token:
suggestedQuery.append(suggestedToken);
Grégory Mantelet
committed
Grégory Mantelet
committed
} while(!isEnd(token));
Grégory Mantelet
committed
Grégory Mantelet
committed
} catch(TokenMgrError err) {
// wrap such errors and propagate them:
throw new ParseException(err);
}
Grégory Mantelet
committed
return suggestedQuery.toString();
}
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
/**
* All of the most common Unicode confusable characters and their
* ASCII/UTF-8 alternative.
*
* <p>
* Keys of this map represent the ASCII character while the values are the
* regular expression for all possible Unicode alternatives.
* </p>
*
* <p><i><b>Note:</b>
* All of them have been listed using
* <a href="https://unicode.org/cldr/utility/confusables.jsp">Unicode Utilities: Confusables</a>.
* </i></p>
*
* @since 1.5
*/
protected final static java.util.Map<String, String> REGEX_UNICODE_CONFUSABLES = new java.util.HashMap<String, String>(10);
/** Regular expression matching all Unicode alternatives for <code>-</code>.
* @since 1.5 */
protected final static String REGEX_DASH = "[-\u02d7\u06d4\u2010\u2011\u2012\u2013\u2043\u2212\u2796\u2cba\ufe58\u2014\u2015\u207b\u208b\u0096\u058a\ufe63\uff0d]";
/** Regular expression matching all Unicode alternatives for <code>_</code>.
* @since 1.5 */
protected final static String REGEX_UNDERSCORE = "[_\u07fa\ufe4d\ufe4e\ufe4f]";
/** Regular expression matching all Unicode alternatives for <code>'</code>.
* @since 1.5 */
protected final static String REGEX_QUOTE = "['`\u00b4\u02b9\u02bb\u02bc\u02bd\u02be\u02c8\u02ca\u02cb\u02f4\u0374\u0384\u055a\u055d\u05d9\u05f3\u07f4\u07f5\u144a\u16cc\u1fbd\u1fbf\u1fef\u1ffd\u1ffe\u2018\u2019\u201b\u2032\u2035\ua78c\uff07\uff40]";
/** Regular expression matching all Unicode alternatives for <code>"</code>.
* @since 1.5 */
protected final static String REGEX_DOUBLE_QUOTE = "[\u02ba\u02dd\u02ee\u02f6\u05f2\u05f4\u1cd3\u201c\u201d\u201f\u2033\u2036\u3003\uff02]";
/** Regular expression matching all Unicode alternatives for <code>.</code>.
* @since 1.5 */
protected final static String REGEX_STOP = "[.\u0660\u06f0\u0701\u0702\u2024\ua4f8\ua60e]";
/** Regular expression matching all Unicode alternatives for <code>+</code>.
* @since 1.5 */
protected final static String REGEX_PLUS = "[+\u16ed\u2795]";
/** Regular expression matching all Unicode alternatives for <code> </code>.
* @since 1.5 */
protected final static String REGEX_SPACE = "[ \u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f]";
/** Regular expression matching all Unicode alternatives for <code><</code>.
* @since 1.5 */
protected final static String REGEX_LESS_THAN = "[<\u02c2\u1438\u16b2\u2039\u276e]";
/** Regular expression matching all Unicode alternatives for <code>></code>.
* @since 1.5 */
protected final static String REGEX_GREATER_THAN = "[>\u02c3\u1433\u203a\u276f]";
/** Regular expression matching all Unicode alternatives for <code>=</code>.
* @since 1.5 */
protected final static String REGEX_EQUAL = "[=\u1400\u2e40\u30a0\ua4ff]";
Grégory Mantelet
committed
static {
REGEX_UNICODE_CONFUSABLES.put("-", REGEX_DASH);
REGEX_UNICODE_CONFUSABLES.put("_", REGEX_UNDERSCORE);
REGEX_UNICODE_CONFUSABLES.put("'", REGEX_QUOTE);
REGEX_UNICODE_CONFUSABLES.put("\u005c"", REGEX_DOUBLE_QUOTE);
REGEX_UNICODE_CONFUSABLES.put(".", REGEX_STOP);
REGEX_UNICODE_CONFUSABLES.put("+", REGEX_PLUS);
REGEX_UNICODE_CONFUSABLES.put(" ", REGEX_SPACE);
REGEX_UNICODE_CONFUSABLES.put("<", REGEX_LESS_THAN);
REGEX_UNICODE_CONFUSABLES.put(">", REGEX_GREATER_THAN);
REGEX_UNICODE_CONFUSABLES.put("=", REGEX_EQUAL);
}
/**
* Replace all Unicode characters that can be confused with other ASCI/UTF-8
* characters (e.g. different spaces, dashes, ...) in their ASCII version.
*
* @param adqlQuery The ADQL query string in which Unicode confusable
* characters must be replaced.
*
* @return The same query without the most common Unicode confusable
* characters.
*
* @since 1.5
*/
Grégory Mantelet
committed
protected String replaceUnicodeConfusables(final String adqlQuery) {
String newAdqlQuery = adqlQuery;
for(java.util.Map.Entry<String, String> confusable : REGEX_UNICODE_CONFUSABLES.entrySet())
newAdqlQuery = newAdqlQuery.replaceAll(confusable.getValue(), confusable.getKey());
return newAdqlQuery;
}
/**
* Tell whether the given token represents the end of an ADQL query.
*
* @param token Token to analyze.
*
* @return <code>true</code> if the given token represents a query end,
* <code>false</code> otherwise.
*
* @since 1.5
*/
Grégory Mantelet
committed
protected boolean isEnd(final Token token) {
return token.kind == ADQLParserConstants.EOF || token.kind == ADQLParserConstants.EOQ;
}
/**
* Tell whether the given token must be double quoted.
*
* <p>
* This function considers all the following as terms to double quote:
* </p>
* <ul>
* <li>SQL reserved keywords</li>,
* <li>unrecognised regular identifiers (e.g. neither a delimited nor a
* valid ADQL regular identifier)</li>
* <li>and ADQL function name without a parameters list.</li>
* </ul>
*
* @param token The token to analyze.
* @param nextToken The following token. (useful to detect the start of a
* function's parameters list)
*
* @return <code>true</code> if the given token must be double quoted,
* <code>false</code> to keep it as provided.
*
* @since 1.5
*/
Grégory Mantelet
committed
protected boolean mustEscape(final Token token, final Token nextToken) {
switch(token.kind) {
case ADQLParserConstants.SQL_RESERVED_WORD:
return true;
case ADQLParserConstants.REGULAR_IDENTIFIER_CANDIDATE:
return !isRegularIdentifier(token.image);
default:
return isFunctionName(token) && (nextToken == null || nextToken.kind != ADQLParserConstants.LEFT_PAR);
}
}
Grégory Mantelet
committed
/**
* Tell whether the given token matches to an ADQL function name.
*
* @param token The token to analyze.
*
* @return <code>true</code> if the given token is an ADQL function name,
* <code>false</code> otherwise.
*
* @since 1.5
*/
Grégory Mantelet
committed
protected boolean isFunctionName(final Token token) {
switch(token.kind) {
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
case ADQLParserConstants.COUNT:
case ADQLParserConstants.EXISTS:
case ADQLParserConstants.AVG:
case ADQLParserConstants.MAX:
case ADQLParserConstants.MIN:
case ADQLParserConstants.SUM:
case ADQLParserConstants.BOX:
case ADQLParserConstants.CENTROID:
case ADQLParserConstants.CIRCLE:
case ADQLParserConstants.POINT:
case ADQLParserConstants.POLYGON:
case ADQLParserConstants.REGION:
case ADQLParserConstants.CONTAINS:
case ADQLParserConstants.INTERSECTS:
case ADQLParserConstants.AREA:
case ADQLParserConstants.COORD1:
case ADQLParserConstants.COORD2:
case ADQLParserConstants.COORDSYS:
case ADQLParserConstants.DISTANCE:
case ADQLParserConstants.ABS:
case ADQLParserConstants.CEILING:
case ADQLParserConstants.DEGREES:
case ADQLParserConstants.EXP:
case ADQLParserConstants.FLOOR:
case ADQLParserConstants.LOG:
case ADQLParserConstants.LOG10:
case ADQLParserConstants.MOD:
case ADQLParserConstants.PI:
case ADQLParserConstants.POWER:
case ADQLParserConstants.RADIANS:
case ADQLParserConstants.RAND:
case ADQLParserConstants.ROUND:
case ADQLParserConstants.SQRT:
case ADQLParserConstants.TRUNCATE:
case ADQLParserConstants.ACOS:
case ADQLParserConstants.ASIN:
case ADQLParserConstants.ATAN:
case ADQLParserConstants.ATAN2:
case ADQLParserConstants.COS:
case ADQLParserConstants.COT:
case ADQLParserConstants.SIN:
case ADQLParserConstants.TAN:
case ADQLParserConstants.USING:
return true;
default:
return false;
}
}
Grégory Mantelet
committed
* Gets the specified ADQL query and parses the given ADQL query. The SQL
* translation is then printed if the syntax is correct.
*
* <p>
* <b>ONLY the syntax is checked: the query is NOT EXECUTED !</b>
* </p>
*
* @param args
* @throws Exception
*/
Grégory Mantelet
committed
public static final void main(String[] args) throws Exception {
final String USAGE = "Usage:\u005cn adqlParser.jar [-d] [-v] [-e] [-a|-s] [-f] [<FILE>|<URL>]\u005cn\u005cnNOTE: If no file or URL is given, the ADQL query is expected in the standard\u005cn input. This query must end with a ';' or <Ctrl+D>!\u005cn\u005cnParameters:\u005cn -v or --verbose : Print the main steps of the parsing\u005cn -d or --debug : Print stack traces when a grave error occurs\u005cn -e or --explain : Explain the ADQL parsing (or Expand the parsing tree)\u005cn -a or --adql : Display the understood ADQL query\u005cn -s or --sql : Ask the SQL translation of the given ADQL query\u005cn (SQL compatible with PostgreSQL)\u005cn -f or --try-fix : Try fixing the most common ADQL query issues before\u005cn attempting to parse the query.\u005cn\u005cnReturn:\u005cn By default: nothing if the query is correct. Otherwise a message explaining\u005cn why the query is not correct is displayed.\u005cn With the -s option, the SQL translation of the given ADQL query will be\u005cn returned.\u005cn With the -a option, the ADQL query is returned as it has been understood.\u005cn\u005cnExit status:\u005cn 0 OK !\u005cn 1 Parameter error (missing or incorrect parameter)\u005cn 2 File error (incorrect file/url, reading error, ...)\u005cn 3 Parsing error (syntactic or semantic error)\u005cn 4 Translation error (a problem has occurred during the translation of the\u005cn given ADQL query in SQL).";
ADQLParser parser;
final String urlRegex = "^(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]";
String file = null, metaFile = null;
short mode = -1;
boolean verbose = false, debug = false, explain = false, tryFix = false;
// Parameters reading:
Grégory Mantelet
committed
for(int i = 0; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-d") || args[i].equalsIgnoreCase("--debug"))
debug = true;
else if (args[i].equalsIgnoreCase("-v") || args[i].equalsIgnoreCase("--verbose"))
verbose = true;
else if (args[i].equalsIgnoreCase("-e") || args[i].equalsIgnoreCase("--explain"))
explain = true;
Grégory Mantelet
committed
else if (args[i].equalsIgnoreCase("-a") || args[i].equalsIgnoreCase("--adql")) {
if (mode != -1) {
System.err.println("((!)) Too much parameter: you must choose between -s, -c, -a or nothing ((!))\u005cn" + USAGE);
System.exit(1);
Grégory Mantelet
committed
} else
Grégory Mantelet
committed
} else if (args[i].equalsIgnoreCase("-s") || args[i].equalsIgnoreCase("--sql")) {
if (mode != -1) {
System.err.println("((!)) Too much parameter: you must choose between -s, -c, -a or nothing ((!))\u005cn" + USAGE);
System.exit(1);
Grégory Mantelet
committed
} else
Grégory Mantelet
committed
} else if (args[i].equalsIgnoreCase("-f") || args[i].equalsIgnoreCase("--try-fix"))
Grégory Mantelet
committed
else if (args[i].equalsIgnoreCase("-h") || args[i].equalsIgnoreCase("--help")) {
System.out.println(USAGE);
System.exit(0);
Grégory Mantelet
committed
} else if (args[i].startsWith("-")) {
System.err.println("((!)) Unknown parameter: \u005c"" + args[i] + "\u005c" ((!))\u005cn" + USAGE);
System.exit(1);
Grégory Mantelet
committed
} else
file = args[i].trim();
}
Grégory Mantelet
committed
try {
// Try fixing the query, if asked:
Grégory Mantelet
committed
if (tryFix) {
if (verbose)
System.out.println("((i)) Trying to automatically fix the query...");
String query;
java.io.InputStream in = null;
Grégory Mantelet
committed
try {
// get the input stream...
if (file == null || file.length() == 0)
in = System.in;
else if (file.matches(urlRegex))
in = (new java.net.URL(file)).openStream();
else
in = new java.io.FileInputStream(file);
// ...and try fixing the query:
query = (new ADQLParser()).tryQuickFix(in);
Grégory Mantelet
committed
} finally {
// close the stream (if opened):
if (in != null)
in.close();
in = null;
}
if (verbose)
System.out.println("((i)) SUGGESTED QUERY:\u005cn" + query);
// Initialise the parser with this fixed query:
parser = new ADQLParser(new java.io.ByteArrayInputStream(query.getBytes()));
}
// Otherwise, take the query as provided:
Grégory Mantelet
committed
else {
// Initialise the parser with the specified input:
if (file == null || file.length() == 0)
parser = new ADQLParser(System.in);
else if (file.matches(urlRegex))
parser = new ADQLParser((new java.net.URL(file)).openStream());
else
parser = new ADQLParser(new java.io.FileInputStream(file));
}
// Enable/Disable the debugging in function of the parameters:
parser.setDebug(explain);
// Query parsing:
Grégory Mantelet
committed
try {
if (verbose)
System.out.print("((i)) Parsing ADQL query...");
ADQLQuery q = parser.parseQuery();
if (verbose)
System.out.println("((i)) CORRECT ADQL QUERY ((i))");
Grégory Mantelet
committed
if (mode == 2) {
PostgreSQLTranslator translator = new PostgreSQLTranslator();
if (verbose)
System.out.print("((i)) Translating in SQL...");
String sql = translator.translate(q);
if (verbose)
System.out.println("ok");
System.out.println(sql);
Grégory Mantelet
committed
} else if (mode == 1) {
System.out.println(q.toADQL());
}
Grégory Mantelet
committed
} catch(UnresolvedIdentifiersException uie) {
System.err.println("((X)) " + uie.getNbErrors() + " unresolved identifiers:");
for(ParseException pe : uie)
System.err.println("\u005ct - at " + pe.getPosition() + ": " + uie.getMessage());
if (debug)
uie.printStackTrace(System.err);
System.exit(3);
Grégory Mantelet
committed
} catch(ParseException pe) {
System.err.println("((X)) Syntax error: " + pe.getMessage() + " ((X))");
if (debug)
pe.printStackTrace(System.err);
System.exit(3);
Grégory Mantelet
committed
} catch(TranslationException te) {
if (verbose)
System.out.println("error");
System.err.println("((X)) Translation error: " + te.getMessage() + " ((X))");
if (debug)
te.printStackTrace(System.err);
System.exit(4);
}
Grégory Mantelet
committed
} catch(IOException ioe) {
System.err.println("\u005cn((X)) Error while reading the file \u005c"" + file + "\u005c": " + ioe.getMessage() + " ((X))");
if (debug)
ioe.printStackTrace(System.err);
System.exit(2);
}
}
/* ########## */
/* # SYNTAX # */
/* ########## */
/* ******************* */
/* GENERAL ADQL SYNTAX */