presage 0.9.1
tokenizer.h
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#ifndef PRESAGE_TOKENIZER
26#define PRESAGE_TOKENIZER
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <iostream>
33#include <istream>
34#include <string>
35#include <assert.h>
36
64class Tokenizer {
65public:
66 Tokenizer(std::istream& stream,
67 const std::string blankspaces,
68 const std::string separators );
69 virtual ~Tokenizer();
70
73 virtual int countTokens() = 0;
74
77 virtual bool hasMoreTokens() const = 0;
78
81 virtual std::string nextToken() = 0;
82
85 virtual double progress() const = 0;
86
87
90 void blankspaceChars(const std::string);
93 std::string blankspaceChars() const;
94
97 void separatorChars(const std::string);
100 std::string separatorChars() const;
101
104 void lowercaseMode(const bool);
107 bool lowercaseMode() const;
108
109 std::string streamToString() const {
110 std::streamoff offbackup = stream.tellg();
111 std::string str;
112 std::streamoff curroff = offbeg;
113 stream.seekg(curroff);
114 while (curroff < offend) {
115 stream.clear();
116 str.push_back(stream.peek());
117 curroff++;
118 stream.seekg(curroff);
119 }
120 stream.seekg(offbackup);
121 return str;
122 }
123
124protected:
126 public:
127 StreamGuard(std::istream& so, std::streamoff& of)
128 : guardedStream(so) {
129 currstate = guardedStream.rdstate();
130 curroff = guardedStream.tellg();
131 guardedStream.seekg (of );
132 }
134 guardedStream.seekg (curroff );
135 guardedStream.setstate(currstate);
136 }
137
138 private:
139 std::istream& guardedStream;
140 std::ios::iostate currstate;
141 std::streamoff curroff;
142 };
143
144 std::istream& stream;
145 std::ios::iostate sstate;
146 std::streamoff offbeg;
147 std::streamoff offend;
148 std::streamoff offset;
149
150 bool isBlankspace(const int character) const;
151 bool isSeparator (const int character) const;
152
153private:
154 std::string blankspaces;
155 std::string separators;
156
158};
159
160#endif // PRESAGE_TOKENIZER
std::ios::iostate currstate
Definition tokenizer.h:140
std::istream & guardedStream
Definition tokenizer.h:139
std::streamoff curroff
Definition tokenizer.h:141
StreamGuard(std::istream &so, std::streamoff &of)
Definition tokenizer.h:127
virtual std::string nextToken()=0
std::istream & stream
Definition tokenizer.h:144
std::string separatorChars() const
Definition tokenizer.cpp:76
std::streamoff offend
Definition tokenizer.h:147
std::streamoff offbeg
Definition tokenizer.h:146
virtual ~Tokenizer()
Definition tokenizer.cpp:53
bool isSeparator(const int character) const
virtual int countTokens()=0
std::string streamToString() const
Definition tokenizer.h:109
std::string blankspaces
Definition tokenizer.h:154
std::streamoff offset
Definition tokenizer.h:148
virtual double progress() const =0
bool lowercase
Definition tokenizer.h:157
bool lowercaseMode() const
Definition tokenizer.cpp:86
std::ios::iostate sstate
Definition tokenizer.h:145
std::string separators
Definition tokenizer.h:155
bool isBlankspace(const int character) const
Definition tokenizer.cpp:91
Tokenizer(std::istream &stream, const std::string blankspaces, const std::string separators)
Definition tokenizer.cpp:27
std::string blankspaceChars() const
Definition tokenizer.cpp:66
virtual bool hasMoreTokens() const =0