General cleanup

- Move more settings to indexing profile and settings - Removed unused imports and functions - cleanup warnings - Remove unused code
PTFS-Europe · Apr 16, 2024 · 931311a · 931311a
1 parent c348b51
commit 931311a
Show file tree

Hide file tree

Showing 11 changed files with 69 additions and 238 deletions.
diff --git a/code/events_indexer/src/com/turning_leaf_technologies/events/CommunicoIndexer.java b/code/events_indexer/src/com/turning_leaf_technologies/events/CommunicoIndexer.java
@@ -33,7 +33,6 @@
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.time.LocalDate;
-import java.time.temporal.TemporalAdjusters;
 import java.util.*;
 import java.util.Date;
 import java.util.zip.CRC32;

diff --git a/...ava_shared_libraries/src/com/turning_leaf_technologies/indexing/BaseIndexingSettings.java b/...ava_shared_libraries/src/com/turning_leaf_technologies/indexing/BaseIndexingSettings.java
@@ -27,6 +27,7 @@ public class BaseIndexingSettings {
 	String treatUnknownLanguageAs;
 	String treatUndeterminedLanguageAs;
 	String customMarcFieldsToIndexAsKeyword;
+	boolean includePersonalAndCorporateNamesInTopics;
 
 	static char getCharFromRecordSet(ResultSet indexingProfilesRS, String fieldName) throws SQLException {
 		String subfieldString = indexingProfilesRS.getString(fieldName);
@@ -124,4 +125,12 @@ public String getTreatUndeterminedLanguageAs() {
 	public void setTreatUndeterminedLanguageAs(String treatUndeterminedLanguageAs) {
 		this.treatUndeterminedLanguageAs = treatUndeterminedLanguageAs;
 	}
+
+	public boolean isIncludePersonalAndCorporateNamesInTopics() {
+		return includePersonalAndCorporateNamesInTopics;
+	}
+
+	public void setIncludePersonalAndCorporateNamesInTopics(boolean includePersonalAndCorporateNamesInTopics) {
+		this.includePersonalAndCorporateNamesInTopics = includePersonalAndCorporateNamesInTopics;
+	}
 }
diff --git a/code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/IndexingProfile.java b/code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/IndexingProfile.java
@@ -220,6 +220,8 @@ public IndexingProfile(ResultSet indexingProfileRS, BaseIndexingLogEntry logEntr
 		this.hideUnknownLiteraryForm = indexingProfileRS.getBoolean("hideUnknownLiteraryForm");
 		this.hideNotCodedLiteraryForm = indexingProfileRS.getBoolean("hideNotCodedLiteraryForm");
 
+		this.includePersonalAndCorporateNamesInTopics = indexingProfileRS.getBoolean("includePersonalAndCorporateNamesInTopics");
+
 		this.setNoteSubfield(getCharFromRecordSet(indexingProfileRS, "noteSubfield"));
 
 		this.setLastUpdateOfChangedRecords(indexingProfileRS.getLong("lastUpdateOfChangedRecords"));

diff --git a/code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/SideLoadSettings.java b/code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/SideLoadSettings.java
@@ -28,6 +28,7 @@ public SideLoadSettings(ResultSet settings) throws SQLException {
 		this.specifiedFormatCategory = settings.getString("specifiedFormatCategory");
 		this.specifiedFormatBoost = settings.getInt("specifiedFormatBoost");
 		this.treatUnknownLanguageAs = settings.getString("treatUnknownLanguageAs");
+		this.includePersonalAndCorporateNamesInTopics = settings.getBoolean("includePersonalAndCorporateNamesInTopics");
 
 		String deletedIdString = settings.getString("deletedRecordsIds");
 		if (deletedIdString != null && deletedIdString.trim().length() > 0) {

diff --git a/code/java_shared_libraries/src/com/turning_leaf_technologies/marc/MarcUtil.java b/code/java_shared_libraries/src/com/turning_leaf_technologies/marc/MarcUtil.java
@@ -35,11 +35,11 @@
  * Class to handle loading data from MARC records
  */
 public class MarcUtil {
-	private static HashMap<String, Set<String>> marcRecordFieldListCache = new HashMap<>();
+	private static final HashMap<String, Set<String>> marcRecordFieldListCache = new HashMap<>();
 	private static long lastRecordHashCode;
 	/**
 	 * Get Set of Strings as indicated by tagStr. For each field spec in the
-	 * tagStr that is NOT about bytes (i.e. not a 008[7-12] type fieldspec), the
+	 * tagStr that is NOT about bytes (i.e. not a 008[7-12] type field spec), the
 	 * result string is the concatenation of all the specific subfields.
 	 *
 	 * @param record
@@ -87,24 +87,24 @@ public static Set<String> getFieldList(org.marc4j.marc.Record record, String tag
 			// Process Subfields
 			String subfield = tag1.substring(3);
 			boolean havePattern = false;
-			int subend = 0;
+			int subfieldEnd = 0;
 			// brackets indicate parsing for individual characters or as pattern
 			int bracket = tag1.indexOf('[');
 			if (bracket != -1) {
-				String[] sub = tag1.substring(bracket + 1).split("[\\]\\[\\-, ]+");
+				String[] sub = tag1.substring(bracket + 1).split("[]\\[\\-, ]+");
 				try {
 					// if bracket expression is digits, expression is treated as character
 					// positions
-					int substart = Integer.parseInt(sub[0]);
-					subend = (sub.length > 1) ? Integer.parseInt(sub[1]) + 1 : substart + 1;
-					String subfieldWObracket = subfield.substring(0, bracket - 3);
-					result.addAll(getSubfieldDataAsSet(record, tag, subfieldWObracket, substart, subend));
+					int subfieldStart = Integer.parseInt(sub[0]);
+					subfieldEnd = (sub.length > 1) ? Integer.parseInt(sub[1]) + 1 : subfieldStart + 1;
+					String subfieldWithoutBracket = subfield.substring(0, bracket - 3);
+					result.addAll(getSubfieldDataAsSet(record, tag, subfieldWithoutBracket, subfieldStart, subfieldEnd));
 				} catch (NumberFormatException e) {
 					// assume brackets expression is a pattern such as [a-z]
 					havePattern = true;
 				}
 			}
-			if (subend == 0) // don't want specific characters.
+			if (subfieldEnd == 0) // don't want specific characters.
 			{
 				String separator = null;
 				if (subfield.indexOf('\'') != -1) {
@@ -162,17 +162,16 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
 		}
 
 		// Loop through Data and Control Fields
-		List<VariableField> varFlds = record.getVariableFields(fldTag);
-		for (VariableField vf : varFlds) {
-			if (!isControlField(fldTag) && subfield != null) {
-				// Data Field
-				DataField dfield = (DataField) vf;
-				resultSet.addAll(dfield.getSubfieldDataAsSet(subfield, beginIx, endIx));
-			} else // Control Field
-			{
-				String cfldData = ((ControlField) vf).getData();
-				if (cfldData.length() >= endIx)
-					resultSet.add(cfldData.substring(beginIx, endIx));
+		List<VariableField> variableFields = record.getVariableFields(fldTag);
+		for (VariableField vf : variableFields) {
+			if (isControlField(fldTag) && subfield != null) {
+				String controlFieldData = ((ControlField) vf).getData();
+				if (controlFieldData.length() >= endIx) {
+					resultSet.add(controlFieldData.substring(beginIx, endIx));
+				}
+			} else  {
+				DataField dataField = (DataField) vf;
+				resultSet.addAll(dataField.getSubfieldDataAsSet(subfield, beginIx, endIx));
 			}
 		}
 		return resultSet;
@@ -187,8 +186,7 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
 	 * @param subfieldsStr
 	 *          - the string containing the desired subfields
 	 * @param separator
-	 *          - the separator string to insert between subfield items (if null,
-	 *          a " " will be used)
+	 *          - the separator string to insert between subfield items (if null, a " " will be used)
 	 * @return a Set of String, where each string is the concatenated contents of
 	 *          all the desired subfield values from a single instance of the
 	 *          fldTag
@@ -210,15 +208,15 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
 
 		// Loop through Data and Control Fields
 		// int iTag = new Integer(fldTag).intValue();
-		List<VariableField> varFlds = record.getVariableFields(fldTag);
-		if (varFlds == null){
+		List<VariableField> variableFields = record.getVariableFields(fldTag);
+		if (variableFields == null){
 			return resultSet;
 		}
-		for (VariableField vf : varFlds) {
+		for (VariableField vf : variableFields) {
 			if (!isControlField(fldTag) && subfieldsStr != null) {
 				// DataField
-				DataField dfield = (DataField) vf;
-				resultSet.addAll(dfield.getSubfieldDataAsSet(subfieldsStr, separator));
+				DataField dataField = (DataField) vf;
+				resultSet.addAll(dataField.getSubfieldDataAsSet(subfieldsStr, separator));
 			} else {
 				// Control Field
 				resultSet.add(((ControlField) vf).getData().trim());
@@ -227,16 +225,11 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
 		return resultSet;
 	}
 
-	private static Pattern controlFieldPattern = Pattern.compile("00[0-9]");
-	private static boolean isControlField(String fieldTag) {
-		return controlFieldPattern.matcher(fieldTag).matches();
-	}
-
 	private static boolean isControlField(int fieldTag) {
 		return fieldTag <= 9;
 	}
 
-	private static HashMap<String, Pattern> subfieldPatterns = new HashMap<>();
+	private static final HashMap<String, Pattern> subfieldPatterns = new HashMap<>();
 	/**
 	 * Given a tag for a field, and a list (or regex) of one or more subfields get
 	 * any linked 880 fields and include the appropriate subfields as a String
@@ -252,8 +245,7 @@ private static boolean isControlField(int fieldTag) {
 	 *          be interpreted as particular bytes, NOT a pattern 100abcd denotes
 	 *          subfields a, b, c, d are desired from the linked 880.
 	 * @param separator
-	 *          - the separator string to insert between subfield items (if null,
-	 *          a " " will be used)
+	 *          - the separator string to insert between subfield items (if null, a " " will be used)
 	 *
 	 * @return set of Strings containing the values of the designated 880
 	 *         field(s)/subfield(s)
@@ -270,10 +262,10 @@ private static Set<String> getLinkedFieldValue(Record record, String tag, String
 			}
 		}
 		List<DataField> fields = record.getDataFields(880);
-		for (DataField dfield : fields) {
-			Subfield link = dfield.getSubfield('6');
+		for (DataField dataField : fields) {
+			Subfield link = dataField.getSubfield('6');
 			if (link != null && link.getData().startsWith(tag)) {
-				List<Subfield> subList = dfield.getSubfields();
+				List<Subfield> subList = dataField.getSubfields();
 				StringBuilder buf = new StringBuilder();
 				for (Subfield subF : subList) {
 					boolean addIt = false;
@@ -332,13 +324,13 @@ public static Set<String> getAllSubfields(Record record, String fieldSpec, Strin
 			String fldTag = fldTag1.substring(0, 3);
 			int fldTagAsInt = Integer.parseInt(fldTag);
 
-			String subfldTags = fldTag1.substring(3);
+			String subfieldTags = fldTag1.substring(3);
 
 			List<DataField> marcFieldList = record.getDataFields(fldTagAsInt);
 			if (!marcFieldList.isEmpty()) {
 				for (DataField marcField : marcFieldList) {
 
-					StringBuilder buffer = getSpecifiedSubfieldsAsString(marcField, subfldTags, separator);
+					StringBuilder buffer = getSpecifiedSubfieldsAsString(marcField, subfieldTags, separator);
 					if (buffer.length() > 0) {
 						result.add(AspenStringUtils.cleanDataForSolr(buffer.toString()));
 					}
@@ -353,7 +345,7 @@ public static StringBuilder getSpecifiedSubfieldsAsString(DataField marcField, S
 		StringBuilder buffer = new StringBuilder();
 		List<Subfield> subFields = marcField.getSubfields();
 		for (Subfield subfield : subFields) {
-			if (validSubfields.length() == 0 || validSubfields.contains("" + subfield.getCode())){
+			if (validSubfields.isEmpty() || validSubfields.contains("" + subfield.getCode())){
 				if (buffer.length() > 0) {
 					buffer.append(separator != null ? separator : " ");
 				}
@@ -382,28 +374,24 @@ public static List<DataField> getDataFields(Record marcRecord, int[] tags) {
 	public static ControlField getControlField(Record marcRecord, String tag){
 		List<ControlField> variableFields = marcRecord.getControlFields(tag);
 		ControlField variableFieldReturn = null;
-		for (Object variableField : variableFields){
-			if (variableField instanceof ControlField){
-				variableFieldReturn = (ControlField)variableField;
-			}
+		for (ControlField variableField : variableFields){
+			variableFieldReturn = variableField;
 		}
 		return variableFieldReturn;
 	}
 
 	public static ControlField getControlField(Record marcRecord, int tag){
 		List<ControlField> variableFields = marcRecord.getControlFields(tag);
 		ControlField variableFieldReturn = null;
-		for (Object variableField : variableFields){
-			if (variableField instanceof ControlField){
-				variableFieldReturn = (ControlField)variableField;
-			}
+		for (ControlField variableField : variableFields){
+			variableFieldReturn = variableField;
 		}
 		return variableFieldReturn;
 	}
 
 	/**
-	 * Loops through all datafields and creates a field for "keywords"
-	 * searching. Shameless stolen from Vufind Indexer Custom Code
+	 * Loops through all data fields and creates a field for "keywords"
+	 * searching. Shameless stolen from VuFind Indexer Custom Code
 	 *
 	 * @param lowerBound
 	 *          - the "lowest" marc field to include (e.g. 100)
@@ -414,12 +402,12 @@ public static ControlField getControlField(Record marcRecord, int tag){
 	 *         range indicated by the bound string arguments.
 	 */
 	public static String getAllSearchableFields(Record record, int lowerBound, int upperBound) {
-		StringBuilder buffer = new StringBuilder("");
+		StringBuilder buffer = new StringBuilder();
 		List<DataField> fields = record.getDataFields();
 		for (DataField field : fields) {
 			// Get all fields starting with the 100 and ending with the 839
 			// This will ignore any "code" fields and only use textual fields
-			int tag = localParseInt(field.getTag(), -1);
+			int tag = localParseInt(field.getTag());
 			if ((tag >= lowerBound) && (tag < upperBound)) {
 				// Loop through subfields
 				List<Subfield> subfields = field.getSubfields();
@@ -439,7 +427,7 @@ public static String getCustomSearchableFields(Record record, String customMarcF
 
 	public static String getFirstFieldVal(Record record, String fieldSpec) {
 		Set<String> result = MarcUtil.getFieldList(record, fieldSpec);
-		if (result.size() == 0){
+		if (result.isEmpty()){
 			return null;
 		}else{
 			return result.iterator().next();
@@ -450,11 +438,9 @@ public static String getFirstFieldVal(Record record, String fieldSpec) {
 	 * return an int for the passed string
 	 *
 	 * @param str The String value of the integer to prompt
-	 * @param defValue
-	 *          - default value, if string doesn't parse into int
 	 */
-	private static int localParseInt(String str, int defValue) {
-		int value = defValue;
+	private static int localParseInt(String str) {
+		int value = -1;
 		try {
 			value = Integer.parseInt(str);
 		} catch (NumberFormatException nfe) {
@@ -464,7 +450,7 @@ private static int localParseInt(String str, int defValue) {
 		return (value);
 	}
 
-	private static Pattern specialCharPattern = Pattern.compile("\\p{C}");
+	private static final Pattern specialCharPattern = Pattern.compile("\\p{C}");
 	public static long getChecksum(Record marcRecord) {
 		CRC32 crc32 = new CRC32();
 		String marcRecordContents = marcRecord.toString();
@@ -489,9 +475,9 @@ public static void outputMarcRecord(Record marcRecord, File individualFile, Logg
 		writer2.close();
 	}
 
-	private static SimpleDateFormat oo8DateFormat = new SimpleDateFormat("yyMMdd");
-	private static SimpleDateFormat oo5DateFormat = new SimpleDateFormat("yyyyMMdd");
-	public synchronized static Long getDateAddedForRecord(Record marcRecord, String recordNumber, String source, File individualFile, Logger logger) {
+	private static final SimpleDateFormat oo8DateFormat = new SimpleDateFormat("yyMMdd");
+	private static final SimpleDateFormat oo5DateFormat = new SimpleDateFormat("yyyyMMdd");
+	public synchronized static Long getDateAddedForRecord(Record marcRecord, File individualFile, Logger logger) {
 		//Set first detection date based on the creation date of the file
 		Long timeAdded = null;
 		if (individualFile.exists()){
@@ -556,15 +542,15 @@ public static Record readMarcRecordFromFile(File marcFile, BaseIndexingLogEntry
 			}
 			marcFileStream.close();
 		}catch (FileNotFoundException fne){
-			//These will now show up in the suppression so we don't need to add them to notes.
+			//These will now show up in the suppression, so we don't need to add them to notes.
 			//logEntry.addNote("Could not find marcFile " + marcFile.getAbsolutePath());
 			return null;
 		}catch (Exception e){
 			//This happens if the file has too many items. Ignore and read with permissive handler.
 			//logEntry.addNote("Could not read marc file, loading permissive " + marcFile.getAbsolutePath() + e.toString());
 		}
 
-		//If we got here, it didn't read successfully.  Try again using the Permissinve Reader
+		//If we got here, it didn't read successfully.  Try again using the Permissive Reader
 		//The Permissive Reader allows reading large files.
 		return readMarcRecordFromFilePermissive(marcFile, logEntry);
 	}
@@ -597,20 +583,11 @@ public static Record readJsonFormattedRecord(String identifier, String marcConte
 			try{
 				Record marcRecord = streamReader.next();
 				marcFileStream.close();
-				streamReader = null;
 				return marcRecord;
-			}catch (JSONException jse){
-			}catch (JsonParser.Escape jse){
+			}catch (JSONException | JsonParser.Escape | MarcException | NullPointerException jse){
 				logEntry.incInvalidRecords(identifier);
 				logEntry.addNote(jse.getMessage());
-			}catch (MarcException me){
-				logEntry.incInvalidRecords(identifier);
-				logEntry.addNote(me.getMessage());
-			}catch (NullPointerException npe){
-				logEntry.incInvalidRecords(identifier);
-				logEntry.addNote(npe.getMessage());
 			}
-			streamReader = null;
 			marcFileStream.close();
 		}catch (Exception e){
 			logEntry.incErrors("Could not parse marc in json format for " + identifier, e);

diff --git a/code/reindexer/src/com/turning_leaf_technologies/reindexer/IlsRecordProcessor.java b/code/reindexer/src/com/turning_leaf_technologies/reindexer/IlsRecordProcessor.java
@@ -95,8 +95,6 @@ abstract class IlsRecordProcessor extends MarcRecordProcessor {
 				this.suppressRecordsWithUrlsMatching = Pattern.compile(suppressRecordsWithUrlsMatching, Pattern.CASE_INSENSITIVE);
 			}
 
-			includePersonalAndCorporateNamesInTopics = indexingProfileRS.getBoolean("includePersonalAndCorporateNamesInTopics");
-
 			loadHoldsStmt = dbConn.prepareStatement("SELECT ilsId, numHolds from ils_hold_summary where ilsId = ?", ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
 			addTranslationMapValueStmt = dbConn.prepareStatement("INSERT INTO translation_map_values (translationMapId, value, translation) VALUES (?, ?, ?)");
 			updateRecordSuppressionReasonStmt = dbConn.prepareStatement("UPDATE ils_records set suppressed=?, suppressionNotes=? where source=? and ilsId=?");