Commit 3e65794d authored by Grégory Mantelet's avatar Grégory Mantelet
Browse files

[TAP] Fix memory issues for the 'text/plain' format.

The previous text formatting process was storing the entire table in memory....
hence OutOfMemoryError when dealing with large table.

Now, this process is done entirely in memory only for a table having less than
1000 lines. For a larger table, its content is stored in a temporary file. This
file is deleted after usage or in case of error.

This formatting process has been tested under JVM monitoring (both JConsole and
VisualVM) and tables larger than 3,000,000 rows, with success.

This commit fixes the GitHub issue #40
parent f912c20a
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@
	<property name="adqlJunitReportsDir" value="reports/adql" />
	<property name="uwsJunitReportsDir" value="reports/uws" />
	<property name="tapJunitReportsDir" value="reports/tap" />
	<property name="cdsJunitReportsDir" value="reports/cds" />
	
	<fail message="The property POSTGRES must be set! It provides the path toward a directory or a JAR which contains all classes inside org.postgresql.">
		<condition><not><isset property="POSTGRES"/></not></condition>
@@ -114,12 +115,14 @@
		<delete dir="${adqlJunitReportsDir}" failonerror="false" />
		<delete dir="${uwsJunitReportsDir}" failonerror="false" />
		<delete dir="${tapJunitReportsDir}" failonerror="false" />
		<delete dir="${cdsJunitReportsDir}" failonerror="false" />
	</target>
	
	<target name="junitValidation" depends="cleanJUnitReports,compileJUnit" description="Executes all JUnit tests before building the library and stop ANT at any error.">
		<mkdir dir="${adqlJunitReportsDir}"/>
		<mkdir dir="${uwsJunitReportsDir}"/>
		<mkdir dir="${tapJunitReportsDir}"/>
		<mkdir dir="${cdsJunitReportsDir}"/>
		<junit fork="true" errorproperty="testsFailure" failureproperty="testsFailure">
			<classpath refid="junit.class.path" />
			<classpath>
@@ -136,6 +139,9 @@
			<batchtest todir="${tapJunitReportsDir}">
				<fileset dir="${testsDir}" includes="tap/**/Test*.java" />
			</batchtest>
			<batchtest todir="${cdsJunitReportsDir}">
				<fileset dir="${testsDir}" includes="cds/**/Test*.java" />
			</batchtest>
		</junit>
		<delete dir="${junitBuildDir}" failonerror="false" />
		<fail if="${testsFailure}" message="Failed JUnit validation for ADQL, UWS or TAP Lib.!" />
+19 −2
Original line number Diff line number Diff line
@@ -35,7 +35,12 @@ import java.util.ArrayList;
 * @version 1.2 Jun 2008 MW Add a toString method (items aligned) ;
 *                          Fix a bug in align() when the last line is not full
 * @version 1.3 Nov 2018 GM Make the alignment process - align() - interruptible
 *
 * @deprecated	This class suffers a lot of memory issues when dealing with
 *            	large tables. {@link LargeAsciiTable} should be
 *            	used instead.
 */
@Deprecated
public class AsciiTable {
	public static final int LEFT = 0;
	public static final int CENTER = 1;
@@ -178,7 +183,11 @@ public class AsciiTable {
	 *                             	<em>This interruption is useful when this
	 *                             	alignment operation becomes time and memory
	 *                             	consuming.</em>
	 *
	 * @deprecated	Deprecated because of memory issues. Instead, you should use
	 *            	{@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[])}.
	 */
	@Deprecated
	public String[] displayAligned(int[] pos) throws InterruptedException{
		return align(pos, '\0', null);
	}
@@ -203,7 +212,11 @@ public class AsciiTable {
	 *                             	<em>This interruption is useful when this
	 *                             	alignment operation becomes time and memory
	 *                             	consuming.</em>
	 *
	 * @deprecated	Deprecated because of memory issues. Instead, you should use
	 *            	{@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[], char)}.
	 */
	@Deprecated
	public String[] displayAligned(int[] pos, char newsep) throws InterruptedException{
		return displayAligned(pos, newsep, null);
	}
@@ -213,8 +226,8 @@ public class AsciiTable {
	 *
	 * <p><strong>IMPORTANT:</strong>
	 * 	The array must have as many columns as the table has. Each column can
	 * 	contain either {@link AsciiTable#LEFT}, {@link AsciiTable#CENTER} or
	 * 	{@link AsciiTable#RIGHT}. If the array contains ONE item, it will be
	 * 	contain either {@link #LEFT}, {@link #CENTER} or
	 * 	{@link #RIGHT}. If the array contains ONE item, it will be
	 * 	used for every column.
	 * </p>
	 *
@@ -230,7 +243,11 @@ public class AsciiTable {
	 *                             	<em>This interruption is useful when this
	 *                             	alignment operation becomes time and memory
	 *                             	consuming.</em>
	 *
	 * @deprecated	Deprecated because of memory issues. Instead, you should use
	 *            	{@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[], char, Thread)}.
	 */
	@Deprecated
	public String[] displayAligned(int[] pos, char newsep, final Thread thread) throws InterruptedException{
		if (newsep == csep)
			newsep = '\0';
+1470 −0

File added.

Preview size limit exceeded, changes collapsed.

+116 −44
Original line number Diff line number Diff line
@@ -26,15 +26,18 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter;

import adql.db.DBColumn;
import cds.util.AsciiTable;
import cds.util.LargeAsciiTable;
import cds.util.LargeAsciiTable.LineProcessor;
import cds.util.LargeAsciiTable.LineProcessorException;
import tap.ServiceConnection;
import tap.TAPException;
import tap.TAPExecutionReport;
import tap.data.TableIterator;

/**
 * Format any given query (table) result into a simple table ASCII representation
 * (columns' width are adjusted so that all columns are well aligned and of the same width).
 * Format any given query (table) result into a simple table ASCII
 * representation (columns' width are adjusted so that all columns are well
 * aligned and of the same width).
 *
 * @author Gr&eacute;gory Mantelet (CDS;ARI)
 * @version 2.3 (11/2018)
@@ -46,21 +49,50 @@ public class TextFormat implements OutputFormat {
	 * @since 2.0 */
	protected static final char COL_SEP = '\u25c6';

	/** The {@link ServiceConnection} to use (for the log and to have some information about the service (particularly: name, description). */
	/** How all columns must be aligned.
	 * @see LargeAsciiTable#streamAligned(LineProcessor, int[])
	 * @since 2.3 */
	protected int[] alignment = new int[]{ LargeAsciiTable.LEFT };

	/** The {@link ServiceConnection} to use (for the log and to have some
	 * information about the service (particularly: name, description). */
	protected final ServiceConnection service;

	/**
	 * Build a {@link TextFormat}.
	 *
	 * <p><em><strong>Note:</strong>
	 * 	All columns values will be aligned on the left.
	 * 	To change this default alignment, use
	 * 	{@link #TextFormat(ServiceConnection, int[])} instead.
	 * </em></p>
	 *
	 * @param service	Description of the TAP service.
	 *
	 * @throws NullPointerException	If the given service connection is <code>null</code>.
	 * @throws NullPointerException	If the given service connection is NULL.
	 */
	public TextFormat(final ServiceConnection service) throws NullPointerException{
		this(service, new int[]{ LargeAsciiTable.LEFT });
	}

	/**
	 * Build a {@link TextFormat}.
	 *
	 * @param service			Description of the TAP service.
	 * @param customAlignment	How columns must be aligned.
	 *                       	<em>(see {@link LargeAsciiTable#streamAligned(LineProcessor, int[])}
	 *                       	to know the rules about this array)</em>
	 *
	 * @throws NullPointerException	If the given service connection is NULL.
	 *
	 * @since 2.3
	 */
	public TextFormat(final ServiceConnection service, final int[] customAlignment) throws NullPointerException{
		if (service == null)
			throw new NullPointerException("The given service connection is NULL!");

		this.service = service;
		this.alignment = customAlignment;
	}

	@Override
@@ -86,12 +118,11 @@ public class TextFormat implements OutputFormat {
	@Override
	public void writeResult(TableIterator result, OutputStream output, TAPExecutionReport execReport, Thread thread) throws TAPException, IOException, InterruptedException{
		// Prepare the formatting of the whole output:
		AsciiTable asciiTable = new AsciiTable(COL_SEP);
		try(LargeAsciiTable asciiTable = new LargeAsciiTable(COL_SEP)){

			// Write header:
			String headerLine = getHeader(result, execReport, thread);
			asciiTable.addHeaderLine(headerLine);
		asciiTable.endHeaderLine();

			if (thread.isInterrupted())
				throw new InterruptedException();
@@ -101,21 +132,10 @@ public class TextFormat implements OutputFormat {

			// Finally write the formatted ASCII table (header + data) in the output stream:
			BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output));
		String[] lines = asciiTable.displayAligned(new int[]{ AsciiTable.LEFT }, '|', thread);
			execReport.nbRows = 0;
		for(String l : lines){
			// stop right now the formatting if the job has been aborted/cancelled/interrupted:
			if (thread.isInterrupted())
				throw new InterruptedException();
			// write the line:
			writer.write(l);
			writer.newLine();
			// update the counter of written lines:
			execReport.nbRows++;
			// flush the writer every 30 lines:
			if (execReport.nbRows % 30 == 0)
				writer.flush();
		}
			try{
				// Align and write the result:
				execReport.nbRows = asciiTable.streamAligned(new LineFormatter(writer), alignment, '|', thread);

				// Add a line in case of an OVERFLOW:
				if (overflow){
@@ -124,14 +144,57 @@ public class TextFormat implements OutputFormat {
				}

				writer.flush();
			}catch(LineProcessorException lpe){
				throw new TAPException("Unexpected error while formatting a result line!", lpe);
			}
		}
	}

	/**
	 * Lets format a line and then write it in the given {@link BufferedWriter}.
	 *
	 * @author Gr&eacute;gory Mantelet (CDS)
	 * @version 2.3 (11/2018)
	 * @since 2.3
	 */
	protected static class LineFormatter implements LineProcessor {
		private final BufferedWriter writer;
		private long nbFormattedLines = 0;

		public LineFormatter(final BufferedWriter writer){
			this.writer = writer;
		}

		@Override
		public boolean process(String line) throws LineProcessorException{
			try{
				// write the line:
				writer.write(line);
				writer.newLine();
				// update the counter of written lines:
				//execReport.nbRows++;
				nbFormattedLines++;
				// flush the writer every 30 lines:
				if (nbFormattedLines % 30 == 0)
					writer.flush();
				return true;
			}catch(IOException ioe){
				throw new LineProcessorException("Impossible to write the given result line!", ioe);
			}
		}
	}

	/**
	 * Get the whole header (one row whose columns are just the columns' name).
	 *
	 * @param result		Result to write later (but it contains also metadata that was extracted from the result itself).
	 * @param execReport	Execution report (which contains the metadata extracted/guessed from the ADQL query).
	 * @param thread		Thread which has asked for this formatting (it must be used in order to test the {@link Thread#isInterrupted()} flag and so interrupt everything if need).
	 * @param result		Result to write later (but it contains also metadata
	 *              		that was extracted from the result itself).
	 * @param execReport	Execution report (which contains the metadata
	 *                  	extracted/guessed from the ADQL query).
	 * @param thread		Thread which has asked for this formatting (it must
	 *              		be used in order to test the
	 *              		{@link Thread#isInterrupted()} flag and so interrupt
	 *              		everything if need).
	 *
	 * @return	All the written metadata.
	 *
@@ -158,19 +221,25 @@ public class TextFormat implements OutputFormat {
	}

	/**
	 * Write all the data rows into the given {@link AsciiTable} object.
	 * Write all the data rows into the given {@link LargeAsciiTable} object.
	 *
	 * @param queryResult	Result to write.
	 * @param asciiTable		Output in which the rows (as string) must be written.
	 * @param execReport		Execution report (which contains the maximum allowed number of records to output).
	 * @param thread			Thread which has asked for this formatting (it must be used in order to test the {@link Thread#isInterrupted()} flag and so interrupt everything if need).
	 * @param asciiTable	Output in which the rows (as string) must be
	 *                  	written.
	 * @param execReport	Execution report (which contains the maximum allowed
	 *                  	number of records to output).
	 * @param thread		Thread which has asked for this formatting (it must
	 *              		be used in order to test the
	 *              		{@link Thread#isInterrupted()} flag and so interrupt
	 *              		everything if need).
	 *
	 * @return	<i>true</i> if an overflow (i.e. nbDBRows > MAXREC) is detected, <i>false</i> otherwise.
	 * @return	<i>true</i> if an overflow (i.e. nbDBRows > MAXREC) is detected,
	 *        	<i>false</i> otherwise.
	 *
	 * @throws InterruptedException		If the thread has been interrupted.
	 * @throws TAPException				If any other error occurs.
	 */
	protected boolean writeData(final TableIterator queryResult, final AsciiTable asciiTable, final TAPExecutionReport execReport, final Thread thread) throws TAPException, InterruptedException{
	protected boolean writeData(final TableIterator queryResult, final LargeAsciiTable asciiTable, final TAPExecutionReport execReport, final Thread thread) throws IOException, TAPException, InterruptedException{
		execReport.nbRows = 0;
		boolean overflow = false;

@@ -210,6 +279,9 @@ public class TextFormat implements OutputFormat {
			execReport.nbRows++;
		}

		// Declare the table as complete:
		asciiTable.endTable();

		return overflow;
	}

+865 −0

File added.

Preview size limit exceeded, changes collapsed.