#include <iostream>
#include <fstream>
#include <string.h>
#include <string>
#include <list>
#include <map>
#include <File.h>
#include <stdio.h>
#include <Directory.h>
#include <Path.h>
#include <NodeInfo.h>
#include <time.h>
#include <vector>
#include <algorithm>
#include "libpq++.h"
#include "utils.h"
Compounds | |
struct | type_handler_t |
Functions | |
PgDatabase | data ("dbname=beindexed host=127.0.0.1") |
int | do_query (const char * query ) |
Perform a query using the global PgDatabase instance. | |
void | init_stop_words () |
Set up g_stop_words, a list of 'stop words'. More... | |
bool | is_stop_word ( string word ) |
Determine if string is a 'stop word', a very common word that it makes no sense to include in the database, like 'the' and 'or'. More... | |
void | text_plain_index_file ( const char * filename, map<string,int> & wordlist ) |
Index a text/plain-file. | |
string | get_file_type ( const char * filename ) |
Get the MIME-type for a given file. More... | |
void | init_type_handlers () |
Initialize g_type_handlers, must be called before any call to index_file(), but must only be called once! More... | |
void | index_file ( const char * filename, map<string,int> & wordlist ) |
Check the file's type and call the corresponding handler. More... | |
int | get_file_id ( string path ) |
Query the database for the fileTable.id corresponding to path. | |
int | get_word_id ( string word ) |
Query the database for the wordTable.id corresponding to word. | |
bool | is_handler_present ( const char * path ) |
Checks if a handler that can handle the file's type is present. More... | |
bool | should_ignore ( const char * path, bool just_attr=false ) |
Checks if a file should be ignored and not included in the database. More... | |
void | add_words_to_wordtable ( map<string,int> & wordlist ) |
Make sure that all words in list are present in wordTable. More... | |
string | get_modification_date ( const char * path ) |
Get the filesystem modification date for a file. | |
void | add_dir_to_database ( const char * ) |
Iterates through the files in a directory, adding them all to the database. | |
void | add_file_to_database ( const char * path ) |
Indexes a file and adds the result to the database. More... | |
int | main (int numArg, char ** argv) |
Plain ol' main(). More... | |
Variables | |
vector<string> | g_stop_words |
A list of 'stop words', populated by init_stop_words(). | |
const char* | _stop_words [] |
vector<type_handler_t> | g_type_handlers |
Vector containing a list of current type handlers, use init_type_handlers() to set it up before calling index_file(). |
|
Iterates through the files in a directory, adding them all to the database.
|
|
Indexes a file and adds the result to the database.
|
|
Make sure that all words in list are present in wordTable.
|
|
|
|
Perform a query using the global PgDatabase instance.
|
|
Query the database for the fileTable.id corresponding to path.
|
|
Get the MIME-type for a given file. Will eventually throw an error if the operation fails |
|
Get the filesystem modification date for a file.
|
|
Query the database for the wordTable.id corresponding to word.
|
|
Check the file's type and call the corresponding handler. Will eventually throw an error if no handler is found, or an error occurs. Don't forget to call init_type_handlers() once at the start of the program to set up type > handler relationships (and load any present add-ons) |
|
Set up g_stop_words, a list of 'stop words'. Will eventually get the list of words to ignore from the database so the user can add or remove words as needed. |
|
Initialize g_type_handlers, must be called before any call to index_file(), but must only be called once! Will eventually load add-ons and add them to the list as well to provide an easy way to add support for more formats. |
|
Checks if a handler that can handle the file's type is present.
|
|
Determine if string is a 'stop word', a very common word that it makes no sense to include in the database, like 'the' and 'or'. init_stop_words() must be called before using this function |
|
Plain ol' main(). Performs some init, then adds something to the database |
|
Checks if a file should be ignored and not included in the database. This is accomplished by checking a number of things: If an attribute, BeIndexed:Rule, is set to 'ignore' or not. --- The rest are only checked if just_attr is false If the file is a symlink If there is an appropriate handler (unless check_handler is false)
|
|
Index a text/plain-file.
|
|
Initializer: { "the", "of", "and", "to", "in", "is", "you", "that", "it", "he" "for", "was", "on", "are", "as", "with", "his", "they", "at", "be", "this", "from", "have", "or", "by", "one", "had", "not", "but", "what", "all", "were", "when", "we", "there", "can", "an", "your", "which", "their", "said", "if", "do", "will", "each", "about", "how", "up", "out", "them", "then", "she", "many", "some", "so", "these", "would", "other", "into", "has", "more", "her", "two", "like", "him", "see", "time", "could", "no", "make", "than", "first", "been", "its", "who", "now", "people", "my", "made", "over", "did", "down", "only", "way", "find", "use", "may", "water", "long", "little", "very", "after", "words", "called", "just", "where", "most", "know", "get", "through", "back", "much", "before", "go", "good", "new", "write", "our", "och", "att", "det", "som", "en", "på", "av", "är", "för", "med", "till", "den", "har", "de", "inte", "om", "ett", "han", "men", "var", "jag", "sig", "vi", "" } |
|
A list of 'stop words', populated by init_stop_words().
|
|
Vector containing a list of current type handlers, use init_type_handlers() to set it up before calling index_file().
|