Introduce email migration tool
authorMathieu Baudier <mbaudier@argeo.org>
Sun, 1 May 2022 06:55:07 +0000 (08:55 +0200)
committerMathieu Baudier <mbaudier@argeo.org>
Sun, 1 May 2022 06:55:07 +0000 (08:55 +0200)
org.argeo.slc.mail/.classpath [new file with mode: 0644]
org.argeo.slc.mail/.gitignore [new file with mode: 0644]
org.argeo.slc.mail/.project [new file with mode: 0644]
org.argeo.slc.mail/META-INF/MANIFEST.MF [new file with mode: 0644]
org.argeo.slc.mail/build.properties [new file with mode: 0644]
org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java [new file with mode: 0644]
org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java [new file with mode: 0644]

diff --git a/org.argeo.slc.mail/.classpath b/org.argeo.slc.mail/.classpath
new file mode 100644 (file)
index 0000000..81fe078
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+       <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-17"/>
+       <classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
+       <classpathentry kind="src" path="src"/>
+       <classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/org.argeo.slc.mail/.gitignore b/org.argeo.slc.mail/.gitignore
new file mode 100644 (file)
index 0000000..27a9162
--- /dev/null
@@ -0,0 +1 @@
+!**/MANIFEST.MF
\ No newline at end of file
diff --git a/org.argeo.slc.mail/.project b/org.argeo.slc.mail/.project
new file mode 100644 (file)
index 0000000..75b09ab
--- /dev/null
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+       <name>org.argeo.slc.mail</name>
+       <comment></comment>
+       <projects>
+       </projects>
+       <buildSpec>
+               <buildCommand>
+                       <name>org.eclipse.jdt.core.javabuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+               <buildCommand>
+                       <name>org.eclipse.pde.ManifestBuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+               <buildCommand>
+                       <name>org.eclipse.pde.SchemaBuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+       </buildSpec>
+       <natures>
+               <nature>org.eclipse.pde.PluginNature</nature>
+               <nature>org.eclipse.jdt.core.javanature</nature>
+       </natures>
+</projectDescription>
diff --git a/org.argeo.slc.mail/META-INF/MANIFEST.MF b/org.argeo.slc.mail/META-INF/MANIFEST.MF
new file mode 100644 (file)
index 0000000..6cb81f2
--- /dev/null
@@ -0,0 +1,15 @@
+Manifest-Version: 1.0
+Bundle-ManifestVersion: 2
+Bundle-Name: Mail
+Bundle-SymbolicName: org.argeo.slc.mail
+Bundle-Version: 1.0.0.qualifier
+Automatic-Module-Name: org.argeo.slc.mail
+Bundle-RequiredExecutionEnvironment: JavaSE-17
+Import-Package: com.sun.mail.imap;version="1.6.2",
+ com.sun.mail.mbox;version="1.6.7",
+ javax.activation,
+ javax.mail;version="1.6.0",
+ javax.mail.event;version="1.6.0",
+ javax.mail.internet;version="1.6.0",
+ javax.mail.search;version="1.6.0",
+ javax.mail.util;version="1.6.0"
diff --git a/org.argeo.slc.mail/build.properties b/org.argeo.slc.mail/build.properties
new file mode 100644 (file)
index 0000000..34d2e4d
--- /dev/null
@@ -0,0 +1,4 @@
+source.. = src/
+output.. = bin/
+bin.includes = META-INF/,\
+               .
diff --git a/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java
new file mode 100644 (file)
index 0000000..378d50c
--- /dev/null
@@ -0,0 +1,362 @@
+package org.argeo.slc.mail;
+
+import static java.lang.System.Logger.Level.DEBUG;
+import static java.lang.System.Logger.Level.ERROR;
+import static org.argeo.slc.mail.EmailUtils.describe;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.System.Logger;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.time.Instant;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Properties;
+
+import javax.mail.FetchProfile;
+import javax.mail.Folder;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.Multipart;
+import javax.mail.Session;
+import javax.mail.Store;
+import javax.mail.URLName;
+import javax.mail.internet.InternetHeaders;
+import javax.mail.internet.MimeBodyPart;
+import javax.mail.internet.MimeMessage;
+import javax.mail.search.HeaderTerm;
+import javax.mail.util.SharedFileInputStream;
+
+import com.sun.mail.imap.IMAPFolder;
+import com.sun.mail.mbox.MboxFolder;
+import com.sun.mail.mbox.MboxMessage;
+
+/** Migrates emails from one storage to the another one. */
+public class EmailMigration {
+       private final static Logger logger = System.getLogger(EmailMigration.class.getName());
+
+       private String targetBaseDir;
+       private String sourceServer;
+       private String sourceUsername;
+       private String sourcePassword;
+
+       public void process() throws MessagingException, IOException {
+               Path baseDir = Paths.get(targetBaseDir).resolve(sourceUsername);
+
+               Store sourceStore = null;
+               try {
+                       Properties sourceProperties = System.getProperties();
+                       sourceProperties.setProperty("mail.store.protocol", "imaps");
+
+                       Session sourceSession = Session.getDefaultInstance(sourceProperties, null);
+                       // session.setDebug(true);
+                       sourceStore = sourceSession.getStore("imaps");
+                       sourceStore.connect(sourceServer, sourceUsername, sourcePassword);
+
+                       // Always start with Inbox
+                       Folder inboxFolder = sourceStore.getFolder(EmailUtils.INBOX);
+
+                       migrateFolder(baseDir, inboxFolder);
+               } finally {
+                       if (sourceStore != null)
+                               sourceStore.close();
+
+               }
+       }
+
+       protected void migrateFolder(Path baseDir, Folder sourceFolder) throws MessagingException, IOException {
+
+               String folderName = sourceFolder.getName();
+               sourceFolder.open(Folder.READ_ONLY);
+
+               Folder targetFolder = null;
+               try {
+                       int messageCount = sourceFolder.getMessageCount();
+                       logger.log(DEBUG, folderName + " - Message count : " + messageCount);
+//                     logger.log(DEBUG, folderName + " - Unread Messages : " + sourceFolder.getUnreadMessageCount());
+
+                       boolean saveAsFiles = false;
+
+                       if (saveAsFiles) {
+                               Message messages[] = sourceFolder.getMessages();
+
+                               for (int i = 0; i < messages.length; ++i) {
+//                                     logger.log(DEBUG, "MESSAGE #" + (i + 1) + ":");
+                                       Message msg = messages[i];
+//                                     String from = "unknown";
+//                                     if (msg.getReplyTo().length >= 1) {
+//                                             from = msg.getReplyTo()[0].toString();
+//                                     } else if (msg.getFrom().length >= 1) {
+//                                             from = msg.getFrom()[0].toString();
+//                                     }
+                                       String subject = msg.getSubject();
+                                       Instant sentDate = msg.getSentDate().toInstant();
+//                                     logger.log(DEBUG, "Saving ... " + subject + " from " + from + " (" + sentDate + ")");
+                                       String fileName = sentDate + "  " + subject;
+                                       Path file = baseDir.resolve(fileName);
+                                       savePartsAsFiles(msg.getContent(), file);
+                               }
+                       } else {
+                               long begin = System.currentTimeMillis();
+                               targetFolder = migrateFolderToMbox(baseDir, sourceFolder);
+                               long duration = System.currentTimeMillis() - begin;
+                               logger.log(DEBUG, folderName + " - Migration of " + messageCount + " messages took " + (duration / 1000)
+                                               + " s (" + (duration / messageCount) + " ms per message)");
+                       }
+               } finally {
+                       sourceFolder.close();
+                       if (targetFolder != null)
+                               targetFolder.close();
+               }
+       }
+
+       protected Folder migrateFolderToMbox(Path baseDir, Folder sourceFolder) throws MessagingException, IOException {
+               String folderName = sourceFolder.getName();
+
+               Path targetDir = baseDir.resolve("mbox");
+               Files.createDirectories(targetDir);
+               Path targetPath = targetDir.resolve(folderName);
+               if (!Files.exists(targetPath))
+                       Files.createFile(targetPath);
+               URLName targetUrlName = new URLName("mbox:" + targetPath.toString());
+               Properties targetProperties = new Properties();
+               // targetProperties.setProperty("mail.mime.address.strict", "false");
+               Session targetSession = Session.getDefaultInstance(targetProperties);
+               Folder targetFolder = targetSession.getFolder(targetUrlName);
+               targetFolder.open(Folder.READ_WRITE);
+
+               int lastSourceNumber;
+               int currentTargetMessageCount = targetFolder.getMessageCount();
+               if (currentTargetMessageCount != 0) {
+                       MimeMessage lastTargetMessage = (MimeMessage) targetFolder.getMessage(currentTargetMessageCount);
+                       logger.log(DEBUG, "Last target message " + describe(lastTargetMessage));
+                       Date lastTargetSent = lastTargetMessage.getReceivedDate();
+                       Message[] lastSourceMessage = sourceFolder
+                                       .search(new HeaderTerm(EmailUtils.MESSAGE_ID, lastTargetMessage.getMessageID()));
+                       if (lastSourceMessage.length == 0)
+                               throw new IllegalStateException("No message found with message ID " + lastTargetMessage.getMessageID());
+                       if (lastSourceMessage.length != 1) {
+                               for (Message msg : lastSourceMessage) {
+                                       logger.log(ERROR, "Message " + describe(msg));
+
+                               }
+                               throw new IllegalStateException(
+                                               lastSourceMessage.length + " messages found with received date " + lastTargetSent.toInstant());
+                       }
+                       lastSourceNumber = lastSourceMessage[0].getMessageNumber();
+               } else {
+                       lastSourceNumber = 0;
+               }
+               logger.log(DEBUG, "Last source message number " + lastSourceNumber);
+
+               int countToRetrieve = sourceFolder.getMessageCount() - lastSourceNumber;
+//     for (int i = startNumber; i < messageCount; i++) {
+//             long begin = System.currentTimeMillis();
+//             Message message = sourceFolder.getMessage(i);
+//             targetFolder.appendMessages(new Message[] { message });
+//             long duration = System.currentTimeMillis() - begin;
+//             logger.log(DEBUG, "Message " + i + " migrated in " + duration + " ms");
+//     }
+
+               FetchProfile fetchProfile = new FetchProfile();
+               fetchProfile.add(FetchProfile.Item.FLAGS);
+               fetchProfile.add(FetchProfile.Item.ENVELOPE);
+               fetchProfile.add(FetchProfile.Item.CONTENT_INFO);
+               fetchProfile.add(FetchProfile.Item.SIZE);
+               if (sourceFolder instanceof IMAPFolder) {
+                       // IMAPFolder sourceImapFolder = (IMAPFolder) sourceFolder;
+                       fetchProfile.add(IMAPFolder.FetchProfileItem.HEADERS);
+                       fetchProfile.add(IMAPFolder.FetchProfileItem.MESSAGE);
+               }
+
+               int batchSize = 100;
+               int batchCount = countToRetrieve / batchSize;
+               if (countToRetrieve % batchSize != 0)
+                       batchCount = batchCount + 1;
+               // int batchCount = 2; // for testing
+               for (int i = 0; i < batchCount; i++) {
+                       long begin = System.currentTimeMillis();
+
+                       int start = lastSourceNumber + i * batchSize + 1;
+                       int end = lastSourceNumber + (i + 1) * batchSize;
+                       if (end >= (lastSourceNumber + countToRetrieve + 1))
+                               end = lastSourceNumber + countToRetrieve;
+                       Message[] sourceMessages = sourceFolder.getMessages(start, end);
+                       sourceFolder.fetch(sourceMessages, fetchProfile);
+                       // targetFolder.appendMessages(sourceMessages);
+                       // sourceFolder.copyMessages(sourceMessages,targetFolder);
+
+                       Message[] targetMessages = new Message[sourceMessages.length];
+                       for (int j = 0; j < sourceMessages.length; j++) {
+                               MimeMessage sourceMm = (MimeMessage) sourceMessages[j];
+                               InternetHeaders ih = new InternetHeaders();
+                               for (Enumeration<String> e = sourceMm.getAllHeaderLines(); e.hasMoreElements();) {
+                                       ih.addHeaderLine(e.nextElement());
+                               }
+//                     Flags flags = sourceMm.getFlags();
+//                     StringBuilder status = new StringBuilder();
+//                     if (flags.contains(Flags.Flag.SEEN))
+//                             status.append('R');
+//                     if (!flags.contains(Flags.Flag.RECENT))
+//                             status.append('O');
+//                     if (status.length() > 0 && ih.getHeader("X-Status") == null)
+//                             ih.setHeader("X-Status", status.toString());
+
+                               Path tmpFileSource = Files.createTempFile("argeo-mbox-source", ".txt");
+                               Path tmpFileTarget = Files.createTempFile("argeo-mbox-target", ".txt");
+                               // logger.log(DEBUG, "tmpFileSource " + tmpFileSource + ", tmpFileTarget " +
+                               // tmpFileTarget);
+                               Files.copy(sourceMm.getRawInputStream(), tmpFileSource, StandardCopyOption.REPLACE_EXISTING);
+
+                               // we use ISO_8859_1 because it is more robust than US_ASCII with regard to
+                               // missing characters
+                               try (BufferedReader reader = Files.newBufferedReader(tmpFileSource, StandardCharsets.ISO_8859_1);
+                                               BufferedWriter writer = Files.newBufferedWriter(tmpFileTarget, StandardCharsets.ISO_8859_1);) {
+                                       int lineNumber = 0;
+                                       String line = null;
+                                       try {
+                                               while ((line = reader.readLine()) != null) {
+                                                       lineNumber++;
+                                                       if (line.startsWith("From ")) {
+                                                               writer.write(">" + line);
+                                                               logger.log(DEBUG, "Fix line " + lineNumber + " in " + EmailUtils.describe(sourceMm)
+                                                                               + ": " + line);
+                                                       } else {
+                                                               writer.write(line);
+                                                       }
+                                                       writer.newLine();
+                                               }
+                                       } catch (IOException e) {
+                                               logger.log(ERROR, "Error around line " + lineNumber + " of " + tmpFileSource);
+                                               throw e;
+                                       }
+                               }
+
+                               MboxMessage mboxMessage = new MboxMessage((MboxFolder) targetFolder, ih,
+                                               new SharedFileInputStream(tmpFileTarget.toFile()), sourceMm.getMessageNumber(),
+                                               EmailUtils.getUnixFrom(sourceMm), true);
+                               targetMessages[j] = mboxMessage;
+
+                               // clean up
+                               Files.delete(tmpFileSource);
+                               Files.delete(tmpFileTarget);
+                       }
+                       targetFolder.appendMessages(targetMessages);
+//             Message[] targetMessages = targetFolder.getMessages(start, end);
+//             for (int j = 0; j < sourceMessages.length; j++) {
+//                     EmailUtils.setHeadersFromFlags((MimeMessage) targetMessages[j], sourceMessages[j].getFlags());
+////                   Flags flags = sourceMessages[j].getFlags();
+////                   targetMessages[j].setFlags(flags, true);
+//                     targetMessages[j].saveChanges();
+//             }
+
+                       String describeLast = describe(sourceMessages[sourceMessages.length - 1]);
+
+//             if (i % 10 == 9) {
+                       // free memory from fetched messages
+                       sourceFolder.close();
+                       targetFolder.close();
+
+                       sourceFolder.open(Folder.READ_ONLY);
+                       targetFolder.open(Folder.READ_WRITE);
+//                     logger.log(DEBUG, "Open/close folder in order to free memory");
+//             }
+
+                       long duration = System.currentTimeMillis() - begin;
+                       logger.log(DEBUG, folderName + " - batch " + i + " took " + (duration / 1000) + " s, "
+                                       + (duration / (end - start)) + " ms per message. Last message " + describeLast);
+               }
+
+               return targetFolder;
+       }
+
+       /** Save body parts and attachments as plain files. */
+       protected void savePartsAsFiles(Object content, Path fileBase) throws IOException, MessagingException {
+               OutputStream out = null;
+               InputStream in = null;
+               try {
+                       if (content instanceof Multipart) {
+                               Multipart multi = ((Multipart) content);
+                               int parts = multi.getCount();
+                               for (int j = 0; j < parts; ++j) {
+                                       MimeBodyPart part = (MimeBodyPart) multi.getBodyPart(j);
+                                       if (part.getContent() instanceof Multipart) {
+                                               // part-within-a-part, do some recursion...
+                                               savePartsAsFiles(part.getContent(), fileBase);
+                                       } else {
+                                               String extension = "";
+                                               if (part.isMimeType("text/html")) {
+                                                       extension = "html";
+                                               } else {
+                                                       if (part.isMimeType("text/plain")) {
+                                                               extension = "txt";
+                                                       } else {
+                                                               // Try to get the name of the attachment
+                                                               extension = part.getDataHandler().getName();
+                                                       }
+                                               }
+                                               String filename = fileBase + "." + extension;
+                                               System.out.println("... " + filename);
+                                               out = new FileOutputStream(new File(filename));
+                                               in = part.getInputStream();
+                                               int k;
+                                               while ((k = in.read()) != -1) {
+                                                       out.write(k);
+                                               }
+                                       }
+                               }
+                       }
+               } finally {
+                       if (in != null) {
+                               in.close();
+                       }
+                       if (out != null) {
+                               out.flush();
+                               out.close();
+                       }
+               }
+       }
+
+       public void setTargetBaseDir(String targetBaseDir) {
+               this.targetBaseDir = targetBaseDir;
+       }
+
+       public void setSourceServer(String sourceServer) {
+               this.sourceServer = sourceServer;
+       }
+
+       public void setSourceUsername(String sourceUsername) {
+               this.sourceUsername = sourceUsername;
+       }
+
+       public void setSourcePassword(String sourcePassword) {
+               this.sourcePassword = sourcePassword;
+       }
+
+       public static void main(String args[]) throws Exception {
+               if (args.length < 4)
+                       throw new IllegalArgumentException(
+                                       "usage: <target base dir> <source IMAP server> <source username> <source password>");
+               String targetBaseDir = args[0];
+               String sourceServer = args[1];
+               String sourceUsername = args[2];
+               String sourcePassword = args[3];
+
+               EmailMigration emailMigration = new EmailMigration();
+               emailMigration.setTargetBaseDir(targetBaseDir);
+               emailMigration.setSourceServer(sourceServer);
+               emailMigration.setSourceUsername(sourceUsername);
+               emailMigration.setSourcePassword(sourcePassword);
+
+               emailMigration.process();
+       }
+}
diff --git a/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java
new file mode 100644 (file)
index 0000000..10bb52c
--- /dev/null
@@ -0,0 +1,117 @@
+package org.argeo.slc.mail;
+
+import java.util.Date;
+
+import javax.mail.Address;
+import javax.mail.Flags;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.internet.InternetAddress;
+import javax.mail.internet.MimeMessage;
+
+/** Utilities around emails. */
+public class EmailUtils {
+       public final static String INBOX = "Inbox";
+       public final static String MESSAGE_ID = "Message-ID";
+
+       public static String getMessageId(Message msg) {
+               try {
+                       return msg instanceof MimeMessage ? ((MimeMessage) msg).getMessageID() : "<N/A>";
+               } catch (MessagingException e) {
+                       throw new IllegalStateException("Cannot extract message id from " + msg, e);
+               }
+       }
+
+       public static String describe(Message msg) {
+               try {
+                       return "Message " + msg.getMessageNumber() + " " + msg.getSentDate().toInstant() + " " + getMessageId(msg);
+               } catch (MessagingException e) {
+                       throw new IllegalStateException("Cannot describe " + msg, e);
+               }
+       }
+
+       static void setHeadersFromFlags(MimeMessage msg, Flags flags) {
+               try {
+                       StringBuilder status = new StringBuilder();
+                       if (flags.contains(Flags.Flag.SEEN))
+                               status.append('R');
+                       if (!flags.contains(Flags.Flag.RECENT))
+                               status.append('O');
+                       if (status.length() > 0)
+                               msg.setHeader("Status", status.toString());
+                       else
+                               msg.removeHeader("Status");
+
+                       boolean sims = false;
+                       String s = msg.getHeader("X-Status", null);
+                       // is it a SIMS 2.0 format X-Status header?
+                       sims = s != null && s.length() == 4 && s.indexOf('$') >= 0;
+                       //status.setLength(0);
+                       if (flags.contains(Flags.Flag.DELETED))
+                               status.append('D');
+                       else if (sims)
+                               status.append('$');
+                       if (flags.contains(Flags.Flag.FLAGGED))
+                               status.append('F');
+                       else if (sims)
+                               status.append('$');
+                       if (flags.contains(Flags.Flag.ANSWERED))
+                               status.append('A');
+                       else if (sims)
+                               status.append('$');
+                       if (flags.contains(Flags.Flag.DRAFT))
+                               status.append('T');
+                       else if (sims)
+                               status.append('$');
+                       if (status.length() > 0)
+                               msg.setHeader("X-Status", status.toString());
+                       else
+                               msg.removeHeader("X-Status");
+
+                       String[] userFlags = flags.getUserFlags();
+                       if (userFlags.length > 0) {
+                               status.setLength(0);
+                               for (int i = 0; i < userFlags.length; i++)
+                                       status.append(userFlags[i]).append(' ');
+                               status.setLength(status.length() - 1); // smash trailing space
+                               msg.setHeader("X-Keywords", status.toString());
+                       }
+                       if (flags.contains(Flags.Flag.DELETED)) {
+                               s = msg.getHeader("X-Dt-Delete-Time", null);
+                               if (s == null)
+                                       // XXX - should be time
+                                       msg.setHeader("X-Dt-Delete-Time", "1");
+                       }
+               } catch (MessagingException e) {
+                       // ignore it
+               }
+       }
+
+    protected static String getUnixFrom(MimeMessage msg) {
+       Address[] afrom;
+       String from;
+       Date ddate;
+       String date;
+       try {
+           if ((afrom = msg.getFrom()) == null ||
+                   !(afrom[0] instanceof InternetAddress) ||
+                   (from = ((InternetAddress)afrom[0]).getAddress()) == null)
+               from = "UNKNOWN";
+           if ((ddate = msg.getReceivedDate()) == null ||
+                   (ddate = msg.getSentDate()) == null)
+               ddate = new Date();
+       } catch (MessagingException e) {
+           from = "UNKNOWN";
+           ddate = new Date();
+       }
+       date = ddate.toString();
+       // date is of the form "Sat Aug 12 02:30:00 PDT 1995"
+       // need to strip out the timezone
+       return "From " + from + " " +
+               date.substring(0, 20) + date.substring(24);
+    }
+
+       /** Singleton. */
+       private EmailUtils() {
+       }
+}