public class DataSet extends java.lang.Object implements java.io.Serializable, java.lang.Iterable<DataRecord>
Modifier and Type | Field and Description |
---|---|
static org.apache.log4j.Logger |
log
Used for logging.
|
Constructor and Description |
---|
DataSet()
An empty constructor.
|
DataSet(java.util.List<DataRecord> dataRecords)
Generates a data set object, given a
List of DataRecord s containing the data. |
Modifier and Type | Method and Description |
---|---|
void |
addDataRecord(DataRecord dataRecord)
Adds a
DataRecord . |
void |
addDataRecord(DataRecord dataRecord,
java.util.HashSet compoundKey)
Adds a
DataRecord . |
void |
append(DataSet dataSet)
Appends a
DataSet to the end of this DataSet , adding rows. |
void |
append(DataSet dataSet,
java.util.HashSet compoundKey)
Appends a
DataSet to the end of this DataSet , adding rows. |
void |
cache(DataSet dataSet)
Caches the data set to the database.
|
void |
clearDataRecords()
Clears all records from the set.
|
void |
closeCachedResultSet()
Closes up the cached result set.
|
void |
deleteDataRecord(int sequence)
Deletes the
DataRecord at the given sequence. |
void |
dropDataSetTable()
Drops the table that was used to cache this data set.
|
void |
dumpToLog()
Dumps the contents of the
DataSet to the log. |
void |
ensureHasBeenLoadedFromCache()
Ensures that the set has been loaded from the cache, if it was previously cached.
|
void |
filterDuplicates(java.util.HashSet compoundKey)
Filters duplicates from the data set.
|
java.lang.Object |
findValue(java.lang.String valueToFind,
java.lang.String columnToMatch,
java.lang.String columnToReturn)
Finds a value in a data record whose column identified by columnToMatch contains the valueToFind.
|
java.lang.Object |
get(int dataRecordNumber,
java.lang.String identifier)
Gets a single item of data record.
|
java.util.List<DataRecordInfo> |
getAllDataRecordInfo()
retrieves all the DataRecordInfo containing the offsets at which the matches in the dataRecord were found.
|
java.util.List<DataRecord> |
getAllDataRecords()
Returns all of the
DataRecord objects as an ArrayList of DataRecord s. |
java.lang.String |
getCharacterSet()
Gets the character encoding of the content given by the server.
|
DataRecord |
getDataRecord(int dataRecordNumber)
Gets a
DataRecord containing data extracted from a single application of an ExtractorPattern . |
DataRecordInfo |
getDataRecordInfo(int i)
retrieves the DataRecordInfo in the ith position
|
java.util.Iterator |
getDataRecordIterator()
Gets an
Iterator that can be used to cycle through all of the DataRecord s in the set. |
java.lang.Object |
getFirstValueForKey(java.lang.String key)
Gets the first non-null value matching the given key.
|
DataRecord |
getNextDataRecordFromCache()
Gets the next
DataRecord from the current result set. |
int |
getNumDataRecords()
Returns the number of data records held by this object.
|
boolean |
hasBeenCached()
Indicates whether or not the data set has already been cached once.
|
java.util.Iterator<DataRecord> |
iterator() |
void |
join(DataSet dataSet)
Joins a
DataSet to this DataSet , potentially adding columns. |
void |
keepFields(java.util.Set<java.lang.String> fieldsToKeep)
Removes fields from the data set that are not contained in the supplied set.
|
void |
loadFromCache()
Re-populate the data set from the data in the cache.
|
void |
removeDataRecordFields()
Removes any fields identified by the DATARECORD constant.
|
void |
removeDATARECORDs()
Removes the DATARECORD entry, if found.
|
void |
removeField(java.lang.String field)
Removes a specific field from every data record in the set.
|
void |
setCharacterSet(java.lang.String characterSet)
Sets the character encoding of the content given by the server.
|
void |
setDataRecordInfo(java.util.List<DataRecordInfo> dataRecordInfo)
Sets the DataRecordInfo for the DataSet which contains the offsets where the text was found
|
void |
setHasBeenLoadedFromCache(boolean hasBeenLoadedFromCache)
Determines whether or not the data set has been loaded from the cache.
|
int |
size()
Same as getNumDataRecords.
|
java.lang.String |
toString() |
void |
writeToFile(java.lang.String fileToSaveTo)
Writes the data set to the file specified by the string.
|
public DataSet()
public DataSet(@NotNull java.util.List<DataRecord> dataRecords)
List
of DataRecord
s containing the data.dataRecords
- The data.@InternalOnly public void setDataRecordInfo(java.util.List<DataRecordInfo> dataRecordInfo)
dataRecordInfo
- The info to set@InternalOnly public java.util.List<DataRecordInfo> getAllDataRecordInfo()
@InternalOnly public DataRecordInfo getDataRecordInfo(int i)
i
- The index (zero based) of the record info to get@InternalOnly public boolean hasBeenCached()
@InternalOnly public void setHasBeenLoadedFromCache(boolean hasBeenLoadedFromCache)
hasBeenLoadedFromCache
- A boolean.@InternalOnly public void cache(DataSet dataSet)
dataSet
- The DataSet
that should be included with this one. We write it to the database only if this isn't the first time we've cached
this.@InternalOnly public void ensureHasBeenLoadedFromCache()
@InternalOnly public void dropDataSetTable()
@InternalOnly public void loadFromCache()
public void join(@NotNull DataSet dataSet)
DataSet
to this DataSet
, potentially adding columns. Note that if columns with duplicate names exist between the sets the columns
in the original will be overwritten.dataSet
- The DataSet
to join.public void append(DataSet dataSet)
DataSet
to the end of this DataSet
, adding rows.dataSet
- The DataSet
to join.public void append(@Nullable DataSet dataSet, @Nullable java.util.HashSet compoundKey)
DataSet
to the end of this DataSet
, adding rows.compoundKey
- The column names to filter on, if records are to be filtered. If this is null records will not be filtered.dataSet
- The DataSet
to join.public void addDataRecord(DataRecord dataRecord)
DataRecord
.dataRecord
- The DataRecord
to add.public void addDataRecord(DataRecord dataRecord, @Nullable java.util.HashSet compoundKey)
DataRecord
.compoundKey
- The column names to filter on, if records are to be filtered. If this is null records will not be filtered.dataRecord
- The DataRecord
to add.public void filterDuplicates(@Nullable java.util.HashSet compoundKey)
compoundKey
- The column names to filter on, if records are to be filtered. If this is null records will not be filtered.@NotNull public java.util.List<DataRecord> getAllDataRecords()
DataRecord
objects as an ArrayList
of DataRecord
s.@NotNull public java.util.Iterator getDataRecordIterator()
Iterator
that can be used to cycle through all of the DataRecord
s in the set.Iterator
.public DataRecord getDataRecord(int dataRecordNumber)
DataRecord
containing data extracted from a single application of an ExtractorPattern
.dataRecordNumber
- The number of the data record.public int size()
public int getNumDataRecords()
public void clearDataRecords()
@Nullable public java.lang.Object get(int dataRecordNumber, @NotNull java.lang.String identifier)
dataRecordNumber
- The number of the data record to get the datum from.identifier
- The String
used to identify the item of data.public void removeDataRecordFields()
public void dumpToLog()
DataSet
to the log.public void deleteDataRecord(int sequence)
DataRecord
at the given sequence.sequence
- The sequence.public void writeToFile(@Nullable java.lang.String fileToSaveTo) throws java.io.IOException
fileToSaveTo
- The file to save to.java.io.IOException
- On write errorpublic void removeDATARECORDs()
@Nullable public java.lang.String getCharacterSet()
public void setCharacterSet(java.lang.String characterSet)
characterSet
- The character encoding.public void closeCachedResultSet()
@Nullable public DataRecord getNextDataRecordFromCache()
DataRecord
from the current result set.DataRecord
.@Nullable public java.lang.Object findValue(@NotNull java.lang.String valueToFind, @NotNull java.lang.String columnToMatch, @NotNull java.lang.String columnToReturn)
valueToFind
- The value to look for.columnToMatch
- The column that may contain the valueToFind.columnToReturn
- The column containing the value to return.@Nullable public java.lang.Object getFirstValueForKey(@NotNull java.lang.String key)
key
- The key to search for.public void removeField(@NotNull java.lang.String field)
field
- The name of the field to be removed.public void keepFields(@NotNull java.util.Set<java.lang.String> fieldsToKeep)
fieldsToKeep
- A Set
of field identifiers.@NotNull public java.lang.String toString()
toString
in class java.lang.Object
@NotNull public java.util.Iterator<DataRecord> iterator()
iterator
in interface java.lang.Iterable<DataRecord>