001 package org.maltparser.core.syntaxgraph.reader;
002
003 import java.io.File;
004 import java.util.HashMap;
005
006 import org.maltparser.core.config.ConfigurationDir;
007 import org.maltparser.core.exception.MaltChainedException;
008 import org.maltparser.core.flow.FlowChartInstance;
009 import org.maltparser.core.flow.item.ChartItem;
010 import org.maltparser.core.flow.spec.ChartItemSpecification;
011 import org.maltparser.core.helper.Util;
012 import org.maltparser.core.io.dataformat.DataFormatException;
013 import org.maltparser.core.io.dataformat.DataFormatInstance;
014 import org.maltparser.core.io.dataformat.DataFormatManager;
015 import org.maltparser.core.options.OptionManager;
016 import org.maltparser.core.symbol.SymbolTableHandler;
017 import org.maltparser.core.syntaxgraph.TokenStructure;
018
019 public class ReadChartItem extends ChartItem {
020 private String idName;
021 private String inputFormatName;
022 private String inputFileName;
023 private String inputCharSet;
024 private String readerOptions;
025 private int iterations;
026 private Class<? extends SyntaxGraphReader> graphReaderClass;
027
028 private String nullValueStrategy;
029 private String rootLabels;
030
031 private SyntaxGraphReader reader;
032 private String targetName;
033 private String optiongroupName;
034 private DataFormatInstance inputDataFormatInstance;
035 private TokenStructure cachedGraph = null;
036
037 public ReadChartItem() { super(); }
038
039 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
040 super.initialize(flowChartinstance, chartItemSpecification);
041
042 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
043 if (key.equals("id")) {
044 idName = chartItemSpecification.getChartItemAttributes().get(key);
045 } else if (key.equals("target")) {
046 targetName = chartItemSpecification.getChartItemAttributes().get(key);
047 } else if (key.equals("optiongroup")) {
048 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
049 }
050 }
051
052 if (idName == null) {
053 idName = getChartElement("read").getAttributes().get("id").getDefaultValue();
054 } else if (targetName == null) {
055 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
056 } else if (optiongroupName == null) {
057 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
058 }
059
060 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
061 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
062 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
063 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
064 if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
065 setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
066 } else {
067 setIterations(1);
068 }
069 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
070
071 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
072 setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString());
073
074
075 initInput(getNullValueStrategy(), getRootLabels());
076 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
077 }
078
079 public int preprocess(int signal) throws MaltChainedException {
080 return signal;
081 }
082
083 public int process(int signal) throws MaltChainedException {
084 if (cachedGraph == null) {
085 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
086 }
087 int prevIterationCounter = reader.getIterationCounter();
088 boolean moreInput = reader.readSentence(cachedGraph);
089 // System.out.println(cachedGraph);
090 // System.exit(1);
091 if (!moreInput) {
092 return ChartItem.TERMINATE;
093 } else if (prevIterationCounter < reader.getIterationCounter()) {
094 return ChartItem.NEWITERATION;
095 }
096 return ChartItem.CONTINUE;
097 // return continueNextSentence && moreInput;
098 }
099
100 public int postprocess(int signal) throws MaltChainedException {
101 return signal;
102 }
103
104 public void terminate() throws MaltChainedException {
105 if (reader != null) {
106 reader.close();
107 reader = null;
108 }
109 cachedGraph = null;
110 inputDataFormatInstance = null;
111 }
112
113 public String getInputFormatName() {
114 if (inputFormatName == null) {
115 return "/appdata/dataformat/conllx.xml";
116 }
117 return inputFormatName;
118 }
119
120 public void setInputFormatName(String inputFormatName) {
121 this.inputFormatName = inputFormatName;
122 }
123
124 public String getInputFileName() {
125 if (inputFileName == null) {
126 return "/dev/stdin";
127 }
128 return inputFileName;
129 }
130
131 public void setInputFileName(String inputFileName) {
132 this.inputFileName = inputFileName;
133 }
134
135 public String getInputCharSet() {
136 if (inputCharSet == null) {
137 return "UTF-8";
138 }
139 return inputCharSet;
140 }
141
142 public void setInputCharSet(String inputCharSet) {
143 this.inputCharSet = inputCharSet;
144 }
145
146 public String getReaderOptions() {
147 if (readerOptions == null) {
148 return "";
149 }
150 return readerOptions;
151 }
152
153 public void setReaderOptions(String readerOptions) {
154 this.readerOptions = readerOptions;
155 }
156
157
158 public int getIterations() {
159 return iterations;
160 }
161
162 public void setIterations(int iterations) {
163 this.iterations = iterations;
164 }
165
166 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
167 return graphReaderClass;
168 }
169
170 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
171 try {
172 if (graphReaderClass != null) {
173 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
174 }
175 } catch (ClassCastException e) {
176 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
177 }
178 }
179
180 public String getNullValueStrategy() {
181 if (nullValueStrategy == null) {
182 return "one";
183 }
184 return nullValueStrategy;
185 }
186
187 public void setNullValueStrategy(String nullValueStrategy) {
188 this.nullValueStrategy = nullValueStrategy;
189 }
190
191 public String getRootLabels() {
192 if (nullValueStrategy == null) {
193 return "ROOT";
194 }
195 return rootLabels;
196 }
197
198 public void setRootLabels(String rootLabels) {
199 this.rootLabels = rootLabels;
200 }
201
202
203 public String getTargetName() {
204 return targetName;
205 }
206
207 public void setTargetName(String targetName) {
208 this.targetName = targetName;
209 }
210
211 public SyntaxGraphReader getReader() {
212 return reader;
213 }
214
215 public DataFormatInstance getInputDataFormatInstance() {
216 return inputDataFormatInstance;
217 }
218
219 public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException {
220 ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
221 DataFormatManager dataFormatManager = configDir.getDataFormatManager();
222 // DataFormatManager dataFormatManager = flowChartinstance.getDataFormatManager();
223 SymbolTableHandler symbolTables = configDir.getSymbolTables();
224 // SymbolTableHandler symbolTables = flowChartinstance.getSymbolTables();
225 HashMap<String, DataFormatInstance> dataFormatInstances = configDir.getDataFormatInstances();
226 // HashMap<String, DataFormatInstance> dataFormatInstances = flowChartinstance.getDataFormatInstances();
227
228 inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy, rootLabels);
229 if (!dataFormatInstances.containsKey(dataFormatManager.getInputDataFormatSpec().getDataFormatName())) {
230 dataFormatInstances.put(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
231 }
232 }
233
234 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
235 try {
236 reader = syntaxGraphReader.newInstance();
237 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
238 reader.open(System.in, inputCharSet);
239 } else if (new File(inputFile).exists()) {
240 reader.setNIterations(iterations);
241 reader.open(inputFile, inputCharSet);
242 } else {
243 reader.setNIterations(iterations);
244 reader.open(Util.findURL(inputFile), inputCharSet);
245 }
246 reader.setDataFormatInstance(inputDataFormatInstance);
247 reader.setOptions(readerOptions);
248 } catch (InstantiationException e) {
249 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
250 } catch (IllegalAccessException e) {
251 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
252 }
253 }
254
255 public boolean equals(Object obj) {
256 if (this == obj)
257 return true;
258 if (obj == null)
259 return false;
260 if (getClass() != obj.getClass())
261 return false;
262 return obj.toString().equals(this.toString());
263 }
264
265 public int hashCode() {
266 return 217 + (null == toString() ? 0 : toString().hashCode());
267 }
268
269 public String toString() {
270 final StringBuilder sb = new StringBuilder();
271 sb.append(" read ");
272 sb.append("id:");sb.append(idName);
273 sb.append(' ');
274 sb.append("target:");
275 sb.append(targetName);
276 sb.append(' ');
277 sb.append("optiongroup:");
278 sb.append(optiongroupName);
279 return sb.toString();
280 }
281 }