|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||
java.lang.Object | +--iglu.ir.FileSearchEngine
This is an implementation of the SearchEngine that uses the FileBTree. It does not implement the entire api. Right now it is mostly useful for storing document vectors and ids, and retrieving then using searches. Although the complete api is not implemented yet, what is implemented is pretty robust. I've used it a lot.
| Field Summary | |
FileBTree |
documentFile
|
(package private) java.util.HashMap |
invEntries
|
FileBTree |
invIndex
|
(package private) boolean |
keepDocs
|
static int |
keyLength
|
(package private) int |
numDocs
|
static int |
termLength
|
FileBTree |
vectorFile
|
| Constructor Summary | |
FileSearchEngine(java.lang.String fname)
|
|
FileSearchEngine(java.lang.String fname,
boolean keepDocs)
|
|
| Method Summary | |
void |
addDocument(java.io.Serializable docId,
java.io.Serializable docData,
TermVector docVector)
Add a vector to the collection. |
void |
close()
|
boolean |
delete(java.io.Serializable docId)
|
boolean |
docExists(java.io.Serializable docId)
Returns true if a document with that ID is already in the database. |
java.util.Iterator |
docIterator()
|
boolean |
equals(java.lang.Object o)
Indicates whether an object is equal to this SearchEngine |
protected void |
flushAll()
|
private void |
flushInvEntries()
|
java.lang.String |
getDescription()
Returns a textual description of this information source. |
java.io.Serializable |
getDocData(java.io.Serializable docId)
Returns the document data associated with docId. |
java.lang.String |
getMetricName()
Returns the name of the similarity metric used by this class. |
java.lang.String |
getName()
Returns the name of this particular source. |
long |
getNumDocuments()
|
double |
getSimilarityScore(TermVector vector1,
TermVector vector2)
Returns the similarity of the two vectors based on the metric indicated by getMetricName(). |
TermVector |
getVector(java.io.Serializable docId)
Get the vector for the given document. |
java.util.Iterator |
iterator()
Returns an iterator over the document identifiers. |
static void |
main(java.lang.String[] argv)
|
ValueSortedMap |
retrieveDocuments(TermVector vector,
int numSimilar)
Return a list of document identifiers with documents similar to the given vector, sorted by similarity. |
void |
setDescription(java.lang.String description)
Sets the description of this particular search engine |
void |
setDocData(java.io.Serializable docId,
java.io.Serializable docData)
Sets the document's data. |
void |
setName(java.lang.String name)
Sets the name of this particular source. |
void |
setVector(java.io.Serializable docId,
TermVector docVector)
Change the vector for docId to the given vector. |
static TermVector |
stringToTV(java.lang.String s)
|
| Methods inherited from class java.lang.Object |
clone, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
public FileBTree invIndex
public FileBTree vectorFile
public FileBTree documentFile
public static final int termLength
public static final int keyLength
java.util.HashMap invEntries
int numDocs
boolean keepDocs
| Constructor Detail |
public FileSearchEngine(java.lang.String fname)
public FileSearchEngine(java.lang.String fname,
boolean keepDocs)
| Method Detail |
public long getNumDocuments()
public void addDocument(java.io.Serializable docId,
java.io.Serializable docData,
TermVector docVector)
SearchEngine
addDocument in interface SearchEngineprivate void flushInvEntries()
public boolean docExists(java.io.Serializable docId)
SearchEngine
docExists in interface SearchEnginepublic boolean equals(java.lang.Object o)
SearchEngine
equals in interface SearchEngineequals in class java.lang.Objectpublic java.lang.String getDescription()
SearchEngine
getDescription in interface SearchEnginepublic java.io.Serializable getDocData(java.io.Serializable docId)
SearchEngine
getDocData in interface SearchEnginepublic java.lang.String getMetricName()
SearchEngine
getMetricName in interface SearchEnginepublic java.lang.String getName()
SearchEngine
getName in interface SearchEngine
public double getSimilarityScore(TermVector vector1,
TermVector vector2)
SearchEnginegetMetricName().
getSimilarityScore in interface SearchEnginepublic TermVector getVector(java.io.Serializable docId)
SearchEngine
getVector in interface SearchEnginepublic java.util.Iterator iterator()
SearchEngine
iterator in interface SearchEnginepublic java.util.Iterator docIterator()
public ValueSortedMap retrieveDocuments(TermVector vector,
int numSimilar)
SearchEngine
retrieveDocuments in interface SearchEnginenumSimilar - The maximum number of documents to return. If
0, return all documents.
public void setDescription(java.lang.String description)
SearchEngine
setDescription in interface SearchEngine
public void setDocData(java.io.Serializable docId,
java.io.Serializable docData)
SearchEngine
setDocData in interface SearchEnginepublic void setName(java.lang.String name)
SearchEngine
setName in interface SearchEngine
public void setVector(java.io.Serializable docId,
TermVector docVector)
SearchEngine
setVector in interface SearchEngineprotected void flushAll()
public boolean delete(java.io.Serializable docId)
public void close()
public static TermVector stringToTV(java.lang.String s)
public static void main(java.lang.String[] argv)
|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||