|
25 Years of Programming
An open source source for C, C++, OWL, BASIC, MDB, XLS, DOT, and more... |
Home Projects Up Sitemap Search Blog Forum+Chat About Us Privacy Terms of Use Feedback FAQ Images Services Ads Donate Humor |
|
|
Borland C++ classes for word lists and text substitutionWordList is a Borland C++ 4.0 dictionary class that maintains a sorted list of unique words or phrases, which can be read from a file upon construction (or after), added to during runtime, and optionally written back to the file in the destructor. TextSub stores pairs of words or phrases, to be used as a lookup table for translating one word or phrase to another. |
|
/* wordlist.h 3/02/02
Copyright (C)1997-2002 Steven Whitney.
Published under GNU GPL (General Public License) Version 2, with ABSOLUTELY NO WARRANTY.
A utility class that maintains a sorted list of unique words or phrases, which can be
read from the file upon construction, added to during runtime, and optionally written
back to the file in the destructor.
Uses the Borland BIDS template container classes.
Used in the WTalk.cpp natural language processing chatbot project for the
whennouns, whenadvs, etc lists.
------
Notes:
--A WordList could be used as an interface to an MDB table. Most efficient method would be
to give it a ctor that includes the db name, table name, and field name it deals with.
Then it can create its own permanent DDE channel on the item, to avoid repetitive
channel creation. Could have a BOOL Find(string tofind), that does the search and
reports whether tofind was in the list. For most small WordLists, this is still
prohibitively inefficient.
*/
#ifndef __WORDLIST_H
#define __WORDLIST_H
#include <classlib\arrays.h>
#include "c:\bcs\my.h"
#pragma hdrstop
//////////////////////////////////////////////////////////////////////////////
class WordList : public TSArrayAsVector<string>
{
public:
WordList(int startcount = 10, string filename = "", BOOL autowrite = FALSE);
~WordList();
friend ostream& operator << (ostream& os, const WordList&); // write all
friend istream& operator >> (istream& is, WordList&); // read all
// array functions
int Add(const string& s);
uint Load(const string& filename, BOOL autowrite);
// variables
BOOL writeonexit; // whether to auto-write back to sourcefile in dtor
protected:
string sourcefile; // file the list was loaded from
};
//////////////////////////////////////////////////////////////////////////////
#endif // wordlist.h
/* wordlist.cpp 3/2/02
Copyright (C)1997-2002 Steven Whitney.
Published under GNU GPL (General Public License) Version 2, with ABSOLUTELY NO WARRANTY.
*/
#include "c:\bcs\library\filearay.h"
#include "c:\bcs\library\wordlist.h"
//----------------------------------------------------------------------------
// constructor
WordList::WordList(int startcount, string filename, BOOL autowrite)
: TSArrayAsVector<string>(startcount,0,startcount)
{
Load(filename,autowrite);
} //constructor
//----------------------------------------------------------------------------
// destructor
WordList::~WordList()
{
if(writeonexit)
ofstream(backupfile(sourcefile).c_str()) << *this;
} //destructor
//----------------------------------------------------------------------------
// write: "word or phrase"\n (no count maintained because WordList files get edited manually)
ostream& operator << (ostream& os, const WordList& w)
{
for(int i = 0 ; i < w.GetItemsInContainer() ; i++)
os << qstring(w[i]) << endl;
return(os);
} //operator << write
//----------------------------------------------------------------------------
// read: unquoted single words or quoted words or phrases with separating whitespace.
// Reads to EOF. 1 wordlist = 1 file
istream& operator >> (istream& is, WordList& w)
{
string s;
while(is >> s) // my operator >> string reads quoted OR unquoted strings
w.Add(s);
return(is);
} //operator >> read
//----------------------------------------------------------------------------
// override array fn to ensure entries are unique.
// a subsequent addition overwrites previous entry: can be used to change
// capitalization, since two strings capitalized differently compare equal.
int WordList::Add(const string& s)
{
for(uint i = 0 ; i < GetItemsInContainer() ; i++)
{
int comp = (*this)[i].compare(s); // string compare
if(comp < 0) // skip preceding entries
continue;
if(comp == 0) // found it
Destroy(i);
break; // quit if you found it OR are past where it should have been
}
return(TSArrayAsVector<string>::Add(s));
} //Add
//----------------------------------------------------------------------------
uint WordList::Load(const string& filename, BOOL autowrite)
{
Flush();
writeonexit = autowrite;
sourcefile = "";
if(filename.length())
{
FileArray FileList; // this makes filename fully qualified in case dir changes
FileList.AddFile(filename);
sourcefile = FileList.GetNext();
ifstream(sourcefile.c_str()) >> *this;
}
else
writeonexit = FALSE; // it MUST be false if you don't know where it came from
return GetItemsInContainer();
}
//----------------------------------------------------------------------------
TextSub and TextSubArray allow translating one word or phrase to another.
/* textsub.h 3/2/02
Copyright (C)1993-2000,2002 Steven Whitney.
Published under GNU GPL (General Public License) Version 2, with ABSOLUTELY NO WARRANTY.
Text substitution classes.
Used in the WTalk.cpp natural language processing chatbot project for:
contraction expansion database
context reversal database: (if user says "i", program says "you", etc.)
------
to do:
what to do about duplicates?: he's = he is, he has
currently, it will use the first in the array, even if array has duplicates
*/
#ifndef __TEXTSUB_H
#define __TEXTSUB_H
#include <classlib\arrays.h>
#include "c:\bcs\my.h"
#pragma hdrstop
/////////////////////////////////////////////////////////////////////////////
// for use in search-and-replace.
// used in Talk.cpp for contractions and reversing context from user to program.
class TextSub
{
public:
TextSub(string k = "", string r = "");
TextSub(const TextSub& other);
// only the keys matter
BOOL operator == (const TextSub& other) const { return(Key == other.Key); }
BOOL operator < (const TextSub& other) const { return(Key < other.Key); }
friend ostream& operator << (ostream& os, const TextSub&); // write
friend istream& operator >> (istream& is, TextSub&); // read
string Key; // search key
string Sub; // replacement
};
//////////////////////////////////////////////////////////////////////////////
// an array of TextSub containing only unique Keys sorted alphabetically.
class TextSubArray : public TSArrayAsVector<TextSub>
{
public:
TextSubArray(string filename = "", BOOL autowrite = FALSE);
~TextSubArray();
friend ostream& operator << (ostream& os, const TextSubArray&); // write
friend istream& operator >> (istream& is, TextSubArray& s); // read
// array functions
// Find makes easier a search for a key, not for a TextSub. INT_MAX if not found.
int Find(const string& key) { return(TSArrayAsVector<TextSub>::Find(TextSub(key))); }
int Add(const TextSub& s);
uint Load(const string& filename, BOOL autowrite);
BOOL writeonexit; // whether to auto-write back to sourcefile in dtor
protected:
string sourcefile; // file it was loaded from
};
//////////////////////////////////////////////////////////////////////////////
#endif // textsub_h
/* textsub.cpp 3/2/02
Copyright (C)1993-2000,2002 Steven Whitney.
Published under GNU GPL (General Public License) Version 2, with ABSOLUTELY NO WARRANTY.
Classes for text substitution, search and replace.
*/
#include "c:\bcs\library\textsub.h"
#include "c:\bcs\library\filearay.h"
//////////////////////////////////////////////////////////////////////////////
// class TextSub
//---------------------------------------------------------------------------
// constructor
TextSub::TextSub(string k, string r) : Key(k), Sub(r) {}
//---------------------------------------------------------------------------
// copy constructor
TextSub::TextSub(const TextSub& other) : Key(other.Key), Sub(other.Sub) {}
//----------------------------------------------------------------------------
// write
ostream& operator << (ostream& os, const TextSub& t)
{
os << qstring(t.Key) << "," << qstring(t.Sub);
return(os);
}
//----------------------------------------------------------------------------
// read
istream& operator >> (istream& is, TextSub& t)
{
is.ignore(MAXINT,'\"'); // find first quote
getline(is,t.Key,'\"'); // Key (read to next quote)
is.ignore(MAXINT,'\"'); // find first quote
getline(is,t.Sub,'\"'); // Expansion
return(is);
}
//----------------------------------------------------------------------------
// end class TextSub
//////////////////////////////////////////////////////////////////////////////
// class TextSubArray
//---------------------------------------------------------------------------
TextSubArray::TextSubArray(string filename, BOOL autowrite)
: TSArrayAsVector<TextSub>(50,0,10)
{
Load(filename,autowrite);
}
//---------------------------------------------------------------------------
TextSubArray::~TextSubArray()
{
if(writeonexit)
ofstream(backupfile(sourcefile).c_str()) << *this;
}
//----------------------------------------------------------------------------
// write
ostream& operator << (ostream& os, const TextSubArray& c)
{
for(int i = 0 ; i < c.GetItemsInContainer() ; i++)
os << c[i] << endl;
return(os);
}
//----------------------------------------------------------------------------
// read
istream& operator >> (istream& is, TextSubArray& tsa)
{
TextSub t;
while(is >> t)
tsa.Add(t); // add entry to database
return(is);
}
//----------------------------------------------------------------------------
// override array fn to ensure entries are unique.
// a subsequent addition overwrites any previous entry. This could be used to help
// solve duplicate contractions problem: if a contraction is misinterpreted
// and corrected, then the new usage is probably the correct one to use for
// at least a while in that particular conversation.
int TextSubArray::Add(const TextSub& s)
{
for(uint i = 0 ; i < GetItemsInContainer() ; i++)
{
// comparing Key explicitly would be faster, but this ensures TextSub compatibility
if((*this)[i] < s) // skip preceding entries
continue;
if((*this)[i] == s) // found it
Destroy(i);
break; // quit if you found it OR are past where it should have been
}
return(TSArrayAsVector<TextSub>::Add(s));
}
//----------------------------------------------------------------------------
// read from a file
uint TextSubArray::Load(const string& filename, BOOL autowrite)
{
Flush();
writeonexit = autowrite;
sourcefile = "";
if(filename.length()) // make filename fully qualified in case dir changes
{
FileArray FileList;
FileList.AddFile(filename);
sourcefile = FileList.GetNext();
ifstream(sourcefile.c_str()) >> *this;
}
else
writeonexit = FALSE; // MUST be false if you don't know where it came from
return GetItemsInContainer();
}
//----------------------------------------------------------------------------
// end class TextSubArray
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|