Commit 62d0b750 authored by Sonia Zorba's avatar Sonia Zorba
Browse files

Avoided OutOfMemoryError in MD5 calculation of big files

parent 9bdc82ff
Loading
Loading
Loading
Loading
Loading
+29 −6
Original line number Diff line number Diff line
@@ -12,7 +12,10 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Objects;
@@ -126,14 +129,14 @@ public class PutFileService {
                LOG.warn("Destination file {} size mismatch with source", destinationFile.toPath().toString());
            }

            if (sourceFileInfo.getContentType() != null && 
                    !sourceFileInfo.getContentType().equals(destinationFileInfo.getContentType())) {
            if (sourceFileInfo.getContentType() != null
                    && !sourceFileInfo.getContentType().equals(destinationFileInfo.getContentType())) {
                LOG.warn("Destination file {} content type mismatch with source {} {}", destinationFile.toPath().toString(),
                        destinationFileInfo.getContentType(), sourceFileInfo.getContentType());
            }

            if (sourceFileInfo.getContentMd5() != null && 
                    !sourceFileInfo.getContentMd5().equals(md5Checksum)) {
            if (sourceFileInfo.getContentMd5() != null
                    && !sourceFileInfo.getContentMd5().equals(md5Checksum)) {
                LOG.warn("Destination file {} md5 mismatch with source {} {}", destinationFile.toPath().toString(),
                        destinationFileInfo.getContentMd5(), sourceFileInfo.getContentMd5());
            }
@@ -189,7 +192,27 @@ public class PutFileService {

    private String makeMD5Checksum(File file) throws NoSuchAlgorithmException, IOException {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(Files.readAllBytes(file.toPath()));

        // We can't update the MessageDigest object in a single step using
        // Files.readAllBytes because we want to handle also big files and that
        // method loads all the content in memory (OutOfMemoryError has been
        // noticed for files bigger than 1GB). So multiple updates using
        // FileChannel and ByteBuffer are used
        try (FileChannel channel = FileChannel.open(file.toPath(), StandardOpenOption.READ)) {
            // creating buffer (by default it is in write mode). FileChannel will write into buffer
            ByteBuffer buffer = ByteBuffer.allocate(1024);
            int bytesRead;
            while ((bytesRead = channel.read(buffer)) != -1) {
                if (bytesRead > 0) {
                    // switch from write to read mode
                    buffer.flip();
                    md.update(buffer);
                }
                // using rewind the buffer can be filled again
                buffer.rewind();
            }
        }

        byte[] digest = md.digest();
        String checksum = DatatypeConverter.printHexBinary(digest);
        return checksum;
+14 −10
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
import org.mockito.Mockito;
import static org.mockito.Mockito.mock;
@@ -92,6 +93,9 @@ public class PutFileControllerTest {

        assertTrue(file.exists());
        assertEquals("content", Files.contentOf(file, StandardCharsets.UTF_8));

        verify(fileDao, times(1)).updateFileAttributes(anyInt(), eq("text/plain"), any(), eq(7l), eq("9A0364B9E99BB480DD25E1F0284C8555"));

        assertTrue(file.delete());
    }