From e7904d760b090538d111a7b811b6a76454c6ef3a Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Sun, 1 May 2022 08:55:07 +0200 Subject: [PATCH] Introduce email migration tool --- org.argeo.slc.mail/.classpath | 7 + org.argeo.slc.mail/.gitignore | 1 + org.argeo.slc.mail/.project | 28 ++ org.argeo.slc.mail/META-INF/MANIFEST.MF | 15 + org.argeo.slc.mail/build.properties | 4 + .../org/argeo/slc/mail/EmailMigration.java | 362 ++++++++++++++++++ .../src/org/argeo/slc/mail/EmailUtils.java | 117 ++++++ 7 files changed, 534 insertions(+) create mode 100644 org.argeo.slc.mail/.classpath create mode 100644 org.argeo.slc.mail/.gitignore create mode 100644 org.argeo.slc.mail/.project create mode 100644 org.argeo.slc.mail/META-INF/MANIFEST.MF create mode 100644 org.argeo.slc.mail/build.properties create mode 100644 org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java create mode 100644 org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java diff --git a/org.argeo.slc.mail/.classpath b/org.argeo.slc.mail/.classpath new file mode 100644 index 000000000..81fe078c2 --- /dev/null +++ b/org.argeo.slc.mail/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/org.argeo.slc.mail/.gitignore b/org.argeo.slc.mail/.gitignore new file mode 100644 index 000000000..27a916263 --- /dev/null +++ b/org.argeo.slc.mail/.gitignore @@ -0,0 +1 @@ +!**/MANIFEST.MF \ No newline at end of file diff --git a/org.argeo.slc.mail/.project b/org.argeo.slc.mail/.project new file mode 100644 index 000000000..75b09ab42 --- /dev/null +++ b/org.argeo.slc.mail/.project @@ -0,0 +1,28 @@ + + + org.argeo.slc.mail + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/org.argeo.slc.mail/META-INF/MANIFEST.MF b/org.argeo.slc.mail/META-INF/MANIFEST.MF new file mode 100644 index 000000000..6cb81f228 --- /dev/null +++ b/org.argeo.slc.mail/META-INF/MANIFEST.MF @@ -0,0 +1,15 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: Mail +Bundle-SymbolicName: org.argeo.slc.mail +Bundle-Version: 1.0.0.qualifier +Automatic-Module-Name: org.argeo.slc.mail +Bundle-RequiredExecutionEnvironment: JavaSE-17 +Import-Package: com.sun.mail.imap;version="1.6.2", + com.sun.mail.mbox;version="1.6.7", + javax.activation, + javax.mail;version="1.6.0", + javax.mail.event;version="1.6.0", + javax.mail.internet;version="1.6.0", + javax.mail.search;version="1.6.0", + javax.mail.util;version="1.6.0" diff --git a/org.argeo.slc.mail/build.properties b/org.argeo.slc.mail/build.properties new file mode 100644 index 000000000..34d2e4d2d --- /dev/null +++ b/org.argeo.slc.mail/build.properties @@ -0,0 +1,4 @@ +source.. = src/ +output.. = bin/ +bin.includes = META-INF/,\ + . diff --git a/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java new file mode 100644 index 000000000..378d50cc2 --- /dev/null +++ b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailMigration.java @@ -0,0 +1,362 @@ +package org.argeo.slc.mail; + +import static java.lang.System.Logger.Level.DEBUG; +import static java.lang.System.Logger.Level.ERROR; +import static org.argeo.slc.mail.EmailUtils.describe; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.System.Logger; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.time.Instant; +import java.util.Date; +import java.util.Enumeration; +import java.util.Properties; + +import javax.mail.FetchProfile; +import javax.mail.Folder; +import javax.mail.Message; +import javax.mail.MessagingException; +import javax.mail.Multipart; +import javax.mail.Session; +import javax.mail.Store; +import javax.mail.URLName; +import javax.mail.internet.InternetHeaders; +import javax.mail.internet.MimeBodyPart; +import javax.mail.internet.MimeMessage; +import javax.mail.search.HeaderTerm; +import javax.mail.util.SharedFileInputStream; + +import com.sun.mail.imap.IMAPFolder; +import com.sun.mail.mbox.MboxFolder; +import com.sun.mail.mbox.MboxMessage; + +/** Migrates emails from one storage to the another one. */ +public class EmailMigration { + private final static Logger logger = System.getLogger(EmailMigration.class.getName()); + + private String targetBaseDir; + private String sourceServer; + private String sourceUsername; + private String sourcePassword; + + public void process() throws MessagingException, IOException { + Path baseDir = Paths.get(targetBaseDir).resolve(sourceUsername); + + Store sourceStore = null; + try { + Properties sourceProperties = System.getProperties(); + sourceProperties.setProperty("mail.store.protocol", "imaps"); + + Session sourceSession = Session.getDefaultInstance(sourceProperties, null); + // session.setDebug(true); + sourceStore = sourceSession.getStore("imaps"); + sourceStore.connect(sourceServer, sourceUsername, sourcePassword); + + // Always start with Inbox + Folder inboxFolder = sourceStore.getFolder(EmailUtils.INBOX); + + migrateFolder(baseDir, inboxFolder); + } finally { + if (sourceStore != null) + sourceStore.close(); + + } + } + + protected void migrateFolder(Path baseDir, Folder sourceFolder) throws MessagingException, IOException { + + String folderName = sourceFolder.getName(); + sourceFolder.open(Folder.READ_ONLY); + + Folder targetFolder = null; + try { + int messageCount = sourceFolder.getMessageCount(); + logger.log(DEBUG, folderName + " - Message count : " + messageCount); +// logger.log(DEBUG, folderName + " - Unread Messages : " + sourceFolder.getUnreadMessageCount()); + + boolean saveAsFiles = false; + + if (saveAsFiles) { + Message messages[] = sourceFolder.getMessages(); + + for (int i = 0; i < messages.length; ++i) { +// logger.log(DEBUG, "MESSAGE #" + (i + 1) + ":"); + Message msg = messages[i]; +// String from = "unknown"; +// if (msg.getReplyTo().length >= 1) { +// from = msg.getReplyTo()[0].toString(); +// } else if (msg.getFrom().length >= 1) { +// from = msg.getFrom()[0].toString(); +// } + String subject = msg.getSubject(); + Instant sentDate = msg.getSentDate().toInstant(); +// logger.log(DEBUG, "Saving ... " + subject + " from " + from + " (" + sentDate + ")"); + String fileName = sentDate + " " + subject; + Path file = baseDir.resolve(fileName); + savePartsAsFiles(msg.getContent(), file); + } + } else { + long begin = System.currentTimeMillis(); + targetFolder = migrateFolderToMbox(baseDir, sourceFolder); + long duration = System.currentTimeMillis() - begin; + logger.log(DEBUG, folderName + " - Migration of " + messageCount + " messages took " + (duration / 1000) + + " s (" + (duration / messageCount) + " ms per message)"); + } + } finally { + sourceFolder.close(); + if (targetFolder != null) + targetFolder.close(); + } + } + + protected Folder migrateFolderToMbox(Path baseDir, Folder sourceFolder) throws MessagingException, IOException { + String folderName = sourceFolder.getName(); + + Path targetDir = baseDir.resolve("mbox"); + Files.createDirectories(targetDir); + Path targetPath = targetDir.resolve(folderName); + if (!Files.exists(targetPath)) + Files.createFile(targetPath); + URLName targetUrlName = new URLName("mbox:" + targetPath.toString()); + Properties targetProperties = new Properties(); + // targetProperties.setProperty("mail.mime.address.strict", "false"); + Session targetSession = Session.getDefaultInstance(targetProperties); + Folder targetFolder = targetSession.getFolder(targetUrlName); + targetFolder.open(Folder.READ_WRITE); + + int lastSourceNumber; + int currentTargetMessageCount = targetFolder.getMessageCount(); + if (currentTargetMessageCount != 0) { + MimeMessage lastTargetMessage = (MimeMessage) targetFolder.getMessage(currentTargetMessageCount); + logger.log(DEBUG, "Last target message " + describe(lastTargetMessage)); + Date lastTargetSent = lastTargetMessage.getReceivedDate(); + Message[] lastSourceMessage = sourceFolder + .search(new HeaderTerm(EmailUtils.MESSAGE_ID, lastTargetMessage.getMessageID())); + if (lastSourceMessage.length == 0) + throw new IllegalStateException("No message found with message ID " + lastTargetMessage.getMessageID()); + if (lastSourceMessage.length != 1) { + for (Message msg : lastSourceMessage) { + logger.log(ERROR, "Message " + describe(msg)); + + } + throw new IllegalStateException( + lastSourceMessage.length + " messages found with received date " + lastTargetSent.toInstant()); + } + lastSourceNumber = lastSourceMessage[0].getMessageNumber(); + } else { + lastSourceNumber = 0; + } + logger.log(DEBUG, "Last source message number " + lastSourceNumber); + + int countToRetrieve = sourceFolder.getMessageCount() - lastSourceNumber; +// for (int i = startNumber; i < messageCount; i++) { +// long begin = System.currentTimeMillis(); +// Message message = sourceFolder.getMessage(i); +// targetFolder.appendMessages(new Message[] { message }); +// long duration = System.currentTimeMillis() - begin; +// logger.log(DEBUG, "Message " + i + " migrated in " + duration + " ms"); +// } + + FetchProfile fetchProfile = new FetchProfile(); + fetchProfile.add(FetchProfile.Item.FLAGS); + fetchProfile.add(FetchProfile.Item.ENVELOPE); + fetchProfile.add(FetchProfile.Item.CONTENT_INFO); + fetchProfile.add(FetchProfile.Item.SIZE); + if (sourceFolder instanceof IMAPFolder) { + // IMAPFolder sourceImapFolder = (IMAPFolder) sourceFolder; + fetchProfile.add(IMAPFolder.FetchProfileItem.HEADERS); + fetchProfile.add(IMAPFolder.FetchProfileItem.MESSAGE); + } + + int batchSize = 100; + int batchCount = countToRetrieve / batchSize; + if (countToRetrieve % batchSize != 0) + batchCount = batchCount + 1; + // int batchCount = 2; // for testing + for (int i = 0; i < batchCount; i++) { + long begin = System.currentTimeMillis(); + + int start = lastSourceNumber + i * batchSize + 1; + int end = lastSourceNumber + (i + 1) * batchSize; + if (end >= (lastSourceNumber + countToRetrieve + 1)) + end = lastSourceNumber + countToRetrieve; + Message[] sourceMessages = sourceFolder.getMessages(start, end); + sourceFolder.fetch(sourceMessages, fetchProfile); + // targetFolder.appendMessages(sourceMessages); + // sourceFolder.copyMessages(sourceMessages,targetFolder); + + Message[] targetMessages = new Message[sourceMessages.length]; + for (int j = 0; j < sourceMessages.length; j++) { + MimeMessage sourceMm = (MimeMessage) sourceMessages[j]; + InternetHeaders ih = new InternetHeaders(); + for (Enumeration e = sourceMm.getAllHeaderLines(); e.hasMoreElements();) { + ih.addHeaderLine(e.nextElement()); + } +// Flags flags = sourceMm.getFlags(); +// StringBuilder status = new StringBuilder(); +// if (flags.contains(Flags.Flag.SEEN)) +// status.append('R'); +// if (!flags.contains(Flags.Flag.RECENT)) +// status.append('O'); +// if (status.length() > 0 && ih.getHeader("X-Status") == null) +// ih.setHeader("X-Status", status.toString()); + + Path tmpFileSource = Files.createTempFile("argeo-mbox-source", ".txt"); + Path tmpFileTarget = Files.createTempFile("argeo-mbox-target", ".txt"); + // logger.log(DEBUG, "tmpFileSource " + tmpFileSource + ", tmpFileTarget " + + // tmpFileTarget); + Files.copy(sourceMm.getRawInputStream(), tmpFileSource, StandardCopyOption.REPLACE_EXISTING); + + // we use ISO_8859_1 because it is more robust than US_ASCII with regard to + // missing characters + try (BufferedReader reader = Files.newBufferedReader(tmpFileSource, StandardCharsets.ISO_8859_1); + BufferedWriter writer = Files.newBufferedWriter(tmpFileTarget, StandardCharsets.ISO_8859_1);) { + int lineNumber = 0; + String line = null; + try { + while ((line = reader.readLine()) != null) { + lineNumber++; + if (line.startsWith("From ")) { + writer.write(">" + line); + logger.log(DEBUG, "Fix line " + lineNumber + " in " + EmailUtils.describe(sourceMm) + + ": " + line); + } else { + writer.write(line); + } + writer.newLine(); + } + } catch (IOException e) { + logger.log(ERROR, "Error around line " + lineNumber + " of " + tmpFileSource); + throw e; + } + } + + MboxMessage mboxMessage = new MboxMessage((MboxFolder) targetFolder, ih, + new SharedFileInputStream(tmpFileTarget.toFile()), sourceMm.getMessageNumber(), + EmailUtils.getUnixFrom(sourceMm), true); + targetMessages[j] = mboxMessage; + + // clean up + Files.delete(tmpFileSource); + Files.delete(tmpFileTarget); + } + targetFolder.appendMessages(targetMessages); +// Message[] targetMessages = targetFolder.getMessages(start, end); +// for (int j = 0; j < sourceMessages.length; j++) { +// EmailUtils.setHeadersFromFlags((MimeMessage) targetMessages[j], sourceMessages[j].getFlags()); +//// Flags flags = sourceMessages[j].getFlags(); +//// targetMessages[j].setFlags(flags, true); +// targetMessages[j].saveChanges(); +// } + + String describeLast = describe(sourceMessages[sourceMessages.length - 1]); + +// if (i % 10 == 9) { + // free memory from fetched messages + sourceFolder.close(); + targetFolder.close(); + + sourceFolder.open(Folder.READ_ONLY); + targetFolder.open(Folder.READ_WRITE); +// logger.log(DEBUG, "Open/close folder in order to free memory"); +// } + + long duration = System.currentTimeMillis() - begin; + logger.log(DEBUG, folderName + " - batch " + i + " took " + (duration / 1000) + " s, " + + (duration / (end - start)) + " ms per message. Last message " + describeLast); + } + + return targetFolder; + } + + /** Save body parts and attachments as plain files. */ + protected void savePartsAsFiles(Object content, Path fileBase) throws IOException, MessagingException { + OutputStream out = null; + InputStream in = null; + try { + if (content instanceof Multipart) { + Multipart multi = ((Multipart) content); + int parts = multi.getCount(); + for (int j = 0; j < parts; ++j) { + MimeBodyPart part = (MimeBodyPart) multi.getBodyPart(j); + if (part.getContent() instanceof Multipart) { + // part-within-a-part, do some recursion... + savePartsAsFiles(part.getContent(), fileBase); + } else { + String extension = ""; + if (part.isMimeType("text/html")) { + extension = "html"; + } else { + if (part.isMimeType("text/plain")) { + extension = "txt"; + } else { + // Try to get the name of the attachment + extension = part.getDataHandler().getName(); + } + } + String filename = fileBase + "." + extension; + System.out.println("... " + filename); + out = new FileOutputStream(new File(filename)); + in = part.getInputStream(); + int k; + while ((k = in.read()) != -1) { + out.write(k); + } + } + } + } + } finally { + if (in != null) { + in.close(); + } + if (out != null) { + out.flush(); + out.close(); + } + } + } + + public void setTargetBaseDir(String targetBaseDir) { + this.targetBaseDir = targetBaseDir; + } + + public void setSourceServer(String sourceServer) { + this.sourceServer = sourceServer; + } + + public void setSourceUsername(String sourceUsername) { + this.sourceUsername = sourceUsername; + } + + public void setSourcePassword(String sourcePassword) { + this.sourcePassword = sourcePassword; + } + + public static void main(String args[]) throws Exception { + if (args.length < 4) + throw new IllegalArgumentException( + "usage: "); + String targetBaseDir = args[0]; + String sourceServer = args[1]; + String sourceUsername = args[2]; + String sourcePassword = args[3]; + + EmailMigration emailMigration = new EmailMigration(); + emailMigration.setTargetBaseDir(targetBaseDir); + emailMigration.setSourceServer(sourceServer); + emailMigration.setSourceUsername(sourceUsername); + emailMigration.setSourcePassword(sourcePassword); + + emailMigration.process(); + } +} diff --git a/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java new file mode 100644 index 000000000..10bb52c9a --- /dev/null +++ b/org.argeo.slc.mail/src/org/argeo/slc/mail/EmailUtils.java @@ -0,0 +1,117 @@ +package org.argeo.slc.mail; + +import java.util.Date; + +import javax.mail.Address; +import javax.mail.Flags; +import javax.mail.Message; +import javax.mail.MessagingException; +import javax.mail.internet.InternetAddress; +import javax.mail.internet.MimeMessage; + +/** Utilities around emails. */ +public class EmailUtils { + public final static String INBOX = "Inbox"; + public final static String MESSAGE_ID = "Message-ID"; + + public static String getMessageId(Message msg) { + try { + return msg instanceof MimeMessage ? ((MimeMessage) msg).getMessageID() : ""; + } catch (MessagingException e) { + throw new IllegalStateException("Cannot extract message id from " + msg, e); + } + } + + public static String describe(Message msg) { + try { + return "Message " + msg.getMessageNumber() + " " + msg.getSentDate().toInstant() + " " + getMessageId(msg); + } catch (MessagingException e) { + throw new IllegalStateException("Cannot describe " + msg, e); + } + } + + static void setHeadersFromFlags(MimeMessage msg, Flags flags) { + try { + StringBuilder status = new StringBuilder(); + if (flags.contains(Flags.Flag.SEEN)) + status.append('R'); + if (!flags.contains(Flags.Flag.RECENT)) + status.append('O'); + if (status.length() > 0) + msg.setHeader("Status", status.toString()); + else + msg.removeHeader("Status"); + + boolean sims = false; + String s = msg.getHeader("X-Status", null); + // is it a SIMS 2.0 format X-Status header? + sims = s != null && s.length() == 4 && s.indexOf('$') >= 0; + //status.setLength(0); + if (flags.contains(Flags.Flag.DELETED)) + status.append('D'); + else if (sims) + status.append('$'); + if (flags.contains(Flags.Flag.FLAGGED)) + status.append('F'); + else if (sims) + status.append('$'); + if (flags.contains(Flags.Flag.ANSWERED)) + status.append('A'); + else if (sims) + status.append('$'); + if (flags.contains(Flags.Flag.DRAFT)) + status.append('T'); + else if (sims) + status.append('$'); + if (status.length() > 0) + msg.setHeader("X-Status", status.toString()); + else + msg.removeHeader("X-Status"); + + String[] userFlags = flags.getUserFlags(); + if (userFlags.length > 0) { + status.setLength(0); + for (int i = 0; i < userFlags.length; i++) + status.append(userFlags[i]).append(' '); + status.setLength(status.length() - 1); // smash trailing space + msg.setHeader("X-Keywords", status.toString()); + } + if (flags.contains(Flags.Flag.DELETED)) { + s = msg.getHeader("X-Dt-Delete-Time", null); + if (s == null) + // XXX - should be time + msg.setHeader("X-Dt-Delete-Time", "1"); + } + } catch (MessagingException e) { + // ignore it + } + } + + protected static String getUnixFrom(MimeMessage msg) { + Address[] afrom; + String from; + Date ddate; + String date; + try { + if ((afrom = msg.getFrom()) == null || + !(afrom[0] instanceof InternetAddress) || + (from = ((InternetAddress)afrom[0]).getAddress()) == null) + from = "UNKNOWN"; + if ((ddate = msg.getReceivedDate()) == null || + (ddate = msg.getSentDate()) == null) + ddate = new Date(); + } catch (MessagingException e) { + from = "UNKNOWN"; + ddate = new Date(); + } + date = ddate.toString(); + // date is of the form "Sat Aug 12 02:30:00 PDT 1995" + // need to strip out the timezone + return "From " + from + " " + + date.substring(0, 20) + date.substring(24); + } + + /** Singleton. */ + private EmailUtils() { + } +} -- 2.39.5