DSpace清理已刪除Bitstream的cleanup時出現java.lang.OutOfMemoryError: Java heap space的處理方法
DSpace中刪除的Bitstream(檔案)只會在資料庫中標示為deleted = 1,並不會從檔案系統中移除。換句話說,檔案還留在伺服器上,佔據硬碟空間。如果要真正地刪除檔案,必須使用[dspace]/bin/cleanup指令,關於Bistream Store(檔案儲存)知識在DSpace的說明書中已經有提及,在此只是稍微回顧一下。
Bistream的清理主要是使用BitstreamStorageManager.cleanup(),他會從資料庫中取出所有被標示為deleted = 1的Bitstream,並一一地從檔案系統中刪除。但是deleted = 1的Bitstream數量太多時,就會出現「java.lang.OutOfMemoryError: Java heap space」錯誤。
簡單來說,這是由於記憶體不足,DSpace無法處理從資料庫取出來的超多資料的錯誤。處理的這個問題的方法有兩種,一種是加大Java可用的記憶體,另一種是變更資料庫查詢的方式。我認為加大Java可用記憶體的方法是治標不治本,因為總是會有挑戰記憶體上限的資料量出現。我建議的是修改查詢資料庫的方法,設定offset跟limit,一次查詢部分資料,然後分多次查詢進行。
因此我修改了BitstreamStorageManager.java,在此也順便提供給有遇到類似問題的人使用:
- 下載BitstreamStorageManager.java (注意,這是DSpace 1.5.1的版本,如果你使用不同版本的話,並不建議直接下載覆蓋,而是請參考下面的說明)
- 放至[dspace-src]/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageManager.java
這個檔案主要是修改了cleanup方法,方法如上述。詳細的程式碼如下:
/** * Clean up the bitstream storage area. This method deletes any bitstreams * which are more than 1 hour old and marked deleted. The deletions cannot * be undone. * * @param deleteDbRecords if true deletes the database records otherwise it * only deletes the files and directories in the assetstore * @exception IOException * If a problem occurs while cleaning up * @exception SQLException * If a problem occurs accessing the RDBMS */ public static void cleanup(boolean deleteDbRecords) throws SQLException, IOException { Context context = null; BitstreamInfoDAO bitstreamInfoDAO = new BitstreamInfoDAO(); int commit_counter = 0; try { context = new Context(); int queryBitstreamNumber = 10; int queryBitstreamIndex = 0; int queryBitstreamInterval = 10; while (queryBitstreamNumber > 0) { String myQuery = "select * from Bitstream where deleted = '1' offset " + queryBitstreamIndex + " limit " + queryBitstreamInterval; List storage = DatabaseManager.queryTable(context, "Bitstream", myQuery) .toList(); queryBitstreamNumber = storage.size(); if (queryBitstreamNumber == 0) break; for (Iterator iterator = storage.iterator(); iterator.hasNext();) { TableRow row = (TableRow) iterator.next(); int bid = row.getIntColumn("bitstream_id"); System.out.println("Ready to Bitsteam (" + bid + ")..."); GeneralFile file = getFile(row); // Make sure entries which do not exist are removed if (file == null || !file.exists()) { log.debug("file is null"); if (deleteDbRecords) { log.debug("deleting record"); bitstreamInfoDAO.deleteBitstreamInfoWithHistory(bid); DatabaseManager.delete(context, "Bitstream", bid); } System.out.println("File not exists, continue."); continue; } // This is a small chance that this is a file which is // being stored -- get it next time. if (isRecent(file)) { log.debug("file is recent"); System.out.println("File is recent."); continue; } if (deleteDbRecords) { log.debug("deleting db record"); bitstreamInfoDAO.deleteBitstreamInfoWithHistory(bid); DatabaseManager.delete(context, "Bitstream", bid); System.out.println("Deleting db record."); } if (isRegisteredBitstream(row.getStringColumn("internal_id"))) { System.out.println("do not delete registered bitstreams"); continue; // do not delete registered bitstreams } boolean success = file.delete(); if (log.isDebugEnabled()) { log.debug("Deleted bitstream " + bid + " (file " + file.getAbsolutePath() + ") with result " + success); System.out.println("Deleted bitstream " + bid + " (file " + file.getAbsolutePath() + ") with result " + success); } // if the file was deleted then // try deleting the parents // Otherwise the cleanup script is set to // leave the db records then the file // and directories have already been deleted // if this is turned off then it still looks like the // file exists if( success ) { deleteParents(file); } // Make sure to commit our outstanding work every 100 // iterations. Otherwise you risk losing the entire transaction // if we hit an exception, which isn't useful at all for large // amounts of bitstreams. commit_counter++; if (commit_counter % 100 == 0) { context.commit(); System.out.println("commit"); } } queryBitstreamIndex = queryBitstreamIndex + queryBitstreamInterval; } //while (queryBitstreamNumber > 0) context.complete(); } // Aborting will leave the DB objects around, even if the // bitstreams are deleted. This is OK; deleting them next // time around will be a no-op. catch (SQLException sqle) { context.abort(); throw sqle; } catch (IOException ioe) { context.abort(); throw ioe; } }