Line data Source code
1 : // EnergyPlus, Copyright (c) 1996-2023, The Board of Trustees of the University of Illinois,
2 : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
3 : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
4 : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
5 : // contributors. All rights reserved.
6 : //
7 : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
8 : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
9 : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
10 : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
11 : // derivative works, and perform publicly and display publicly, and to permit others to do so.
12 : //
13 : // Redistribution and use in source and binary forms, with or without modification, are permitted
14 : // provided that the following conditions are met:
15 : //
16 : // (1) Redistributions of source code must retain the above copyright notice, this list of
17 : // conditions and the following disclaimer.
18 : //
19 : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
20 : // conditions and the following disclaimer in the documentation and/or other materials
21 : // provided with the distribution.
22 : //
23 : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
24 : // the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
25 : // used to endorse or promote products derived from this software without specific prior
26 : // written permission.
27 : //
28 : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
29 : // without changes from the version obtained under this License, or (ii) Licensee makes a
30 : // reference solely to the software portion of its product, Licensee must refer to the
31 : // software as "EnergyPlus version X" software, where "X" is the version number Licensee
32 : // obtained under this License and may not use a different name for the software. Except as
33 : // specifically required in this Section (4), Licensee shall not use in a company name, a
34 : // product name, in advertising, publicity, or other promotional activities any name, trade
35 : // name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
36 : // similar designation, without the U.S. Department of Energy's prior written consent.
37 : //
38 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
39 : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
40 : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
41 : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
43 : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
46 : // POSSIBILITY OF SUCH DAMAGE.
47 :
48 : #include <EnergyPlus/InputProcessing/CsvParser.hh>
49 : #include <fast_float/fast_float.h>
50 : #include <fmt/format.h>
51 : #include <milo/dtoa.h>
52 : #include <milo/itoa.h>
53 :
54 : using json = nlohmann::json;
55 :
56 0 : std::vector<std::string> const &CsvParser::errors()
57 : {
58 0 : return errors_;
59 : }
60 :
61 0 : std::vector<std::string> const &CsvParser::warnings()
62 : {
63 0 : return warnings_;
64 : }
65 :
66 0 : bool CsvParser::hasErrors()
67 : {
68 0 : return !errors_.empty();
69 : }
70 :
71 10 : json CsvParser::decode(std::string_view csv, char user_delimiter, int user_rows_to_skip)
72 : {
73 10 : bool success = true;
74 10 : return decode(csv, csv.size(), success, user_delimiter, user_rows_to_skip);
75 : }
76 :
77 0 : json CsvParser::decode(std::string_view csv, bool &success, char user_delimiter, int user_rows_to_skip)
78 : {
79 0 : return decode(csv, csv.size(), success, user_delimiter, user_rows_to_skip);
80 : }
81 :
82 0 : json CsvParser::decode(std::string_view csv, size_t _csv_size, char user_delimiter, int user_rows_to_skip)
83 : {
84 0 : bool success = true;
85 0 : return decode(csv, _csv_size, success, user_delimiter, user_rows_to_skip);
86 : }
87 :
88 10 : json CsvParser::decode(std::string_view csv, size_t _csv_size, bool &success, char _delimiter, int _rows_to_skip)
89 : {
90 10 : if (csv.empty()) {
91 0 : success = false;
92 0 : return nullptr;
93 : }
94 :
95 10 : success = true;
96 10 : cur_line_num = 1;
97 10 : index_into_cur_line = 0;
98 10 : beginning_of_line_index = 0;
99 10 : delimiter = _delimiter;
100 10 : rows_to_skip = _rows_to_skip;
101 10 : csv_size = _csv_size;
102 :
103 10 : size_t index = 0;
104 10 : return parse_csv(csv, index, success);
105 : }
106 :
107 0 : std::string CsvParser::encode(json const &root)
108 : {
109 0 : std::string encoded;
110 0 : if (csv_size > 0) {
111 0 : encoded.reserve(csv_size);
112 : } else {
113 0 : encoded.reserve(root["header"].size() * 8760 * 2 * 3);
114 : }
115 :
116 0 : return encoded;
117 : }
118 :
119 1 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
120 : {
121 : Token token;
122 1 : int rows_skipped = 0;
123 : while (true) {
124 545 : token = next_token(csv, index);
125 273 : if (token == Token::FILE_END) {
126 0 : break;
127 273 : } else if (token == Token::LINE_END) {
128 4 : ++rows_skipped;
129 4 : if (rows_skipped == rows_to_skip) {
130 1 : break;
131 : }
132 : }
133 : }
134 1 : }
135 :
136 10 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
137 : {
138 : Token token;
139 10 : int num_columns = 0;
140 :
141 10 : size_t save_index = index;
142 10 : size_t save_line_num = cur_line_num;
143 10 : size_t save_line_index = index_into_cur_line;
144 10 : size_t save_beginning_of_line_index = beginning_of_line_index;
145 :
146 : while (true) {
147 14130 : token = next_token(csv, save_index);
148 7070 : if (token == Token::FILE_END) {
149 0 : break;
150 7070 : } else if (token == Token::DELIMITER) {
151 303 : ++num_columns;
152 6767 : } else if (token == Token::LINE_END) {
153 10 : ++num_columns;
154 10 : break;
155 : }
156 : }
157 :
158 10 : cur_line_num = save_line_num;
159 10 : index_into_cur_line = save_line_index;
160 10 : beginning_of_line_index = save_beginning_of_line_index;
161 :
162 10 : return num_columns;
163 : }
164 :
165 10 : json CsvParser::parse_csv(std::string_view csv, size_t &index, bool &success)
166 : {
167 10 : json root = {{"header", json::array()}, {"values", json::array()}};
168 10 : bool check_first_row = true;
169 10 : bool has_header = (rows_to_skip == 1);
170 :
171 10 : if (csv_size > 3) {
172 : // UTF-8 Byte Order Mark
173 10 : if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
174 0 : index += 3;
175 0 : index_into_cur_line += 3;
176 : }
177 : }
178 :
179 10 : if (rows_to_skip > 1) {
180 1 : skip_rows(csv, index);
181 : }
182 :
183 10 : json &header = root["header"];
184 10 : json &columns = root["values"];
185 : while (true) {
186 271580 : if (index == csv_size) {
187 10 : break;
188 : } else {
189 271570 : if (check_first_row) {
190 10 : int num_columns = find_number_columns(csv, index);
191 10 : check_first_row = !check_first_row;
192 :
193 323 : for (int i = 0; i < num_columns; ++i) {
194 626 : auto arr = std::vector<json>();
195 313 : arr.reserve(8764 * 4);
196 313 : columns.push_back(std::move(arr));
197 : }
198 :
199 10 : if (has_header) {
200 9 : parse_header(csv, index, success, header);
201 : }
202 10 : continue;
203 : }
204 :
205 271560 : parse_line(csv, index, columns);
206 271560 : if (!success) {
207 0 : auto found_index = csv.find_first_of('\n', beginning_of_line_index);
208 0 : std::string line;
209 0 : if (found_index != std::string::npos) {
210 0 : line = csv.substr(beginning_of_line_index, found_index - beginning_of_line_index);
211 : }
212 0 : errors_.emplace_back(fmt::format("Line: {} Index: {} - Parsing Error. Error in following line.", cur_line_num, index_into_cur_line));
213 0 : errors_.emplace_back(fmt::format("~~~ {}", line));
214 0 : success = false;
215 0 : continue;
216 : }
217 : }
218 271570 : }
219 :
220 10 : return root;
221 : }
222 :
223 612 : void CsvParser::parse_header(std::string_view csv, size_t &index, bool &success, json &header)
224 : {
225 : Token token;
226 :
227 : while (true) {
228 1215 : token = look_ahead(csv, index);
229 612 : if (token == Token::LINE_END || token == Token::FILE_END) {
230 9 : next_token(csv, index);
231 9 : return;
232 603 : } else if (token == Token::DELIMITER) {
233 298 : next_token(csv, index);
234 : } else {
235 305 : header.push_back(parse_value(csv, index));
236 305 : if (!success) return;
237 : }
238 : }
239 : }
240 :
241 271560 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
242 : {
243 : Token token;
244 271560 : int column_num = 0;
245 :
246 : while (true) {
247 44369400 : token = look_ahead(csv, index);
248 22320480 : if (token == Token::LINE_END || token == Token::FILE_END) {
249 271560 : next_token(csv, index);
250 271560 : return;
251 22048920 : } else if (token == Token::DELIMITER) {
252 10923720 : next_token(csv, index);
253 10923720 : ++column_num;
254 : } else {
255 11125200 : columns.at(column_num).push_back(parse_value(csv, index));
256 : // if (!success) return;
257 : }
258 : }
259 : }
260 :
261 : // json CsvParser::parse_value(std::string_view csv, size_t &index, bool &success)
262 : //{
263 : // Token token;
264 : // token = look_ahead(csv, index);
265 : //
266 : // switch (token) {
267 : // case Token::STRING: {
268 : // return parse_string(csv, index);
269 : // }
270 : // case Token::Num: {
271 : // return parse_number(csv, index);
272 : // }
273 : // case Token::FILE_END:
274 : // case Token::LINE_END:
275 : // case Token::DELIMITER:
276 : // default:
277 : // break;
278 : // }
279 : // success = false;
280 : // return nullptr;
281 : //}
282 :
283 11125505 : json CsvParser::parse_value(std::string_view csv, size_t &index)
284 : {
285 11125505 : eat_whitespace(csv, index);
286 :
287 11125505 : size_t save_i = index;
288 :
289 : while (true) {
290 121194113 : if (save_i == csv_size) {
291 0 : break;
292 : }
293 :
294 121194113 : char const c = csv[save_i];
295 121194113 : if (c == delimiter || c == '\n' || c == '\r') {
296 : break;
297 : }
298 110068608 : ++save_i;
299 110068608 : }
300 :
301 11125505 : auto diff = save_i - index;
302 11125505 : auto value = csv.substr(index, diff);
303 11125505 : index_into_cur_line += diff;
304 11125505 : index = save_i;
305 :
306 11125505 : size_t plus_sign = 0;
307 11125505 : if (value.front() == '+') {
308 0 : plus_sign = 1;
309 : }
310 :
311 11125505 : auto const value_end = value.data() + value.size(); // have to do this for MSVC
312 :
313 : double val;
314 11125505 : auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val);
315 11125505 : if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
316 305 : return rtrim(value);
317 11125200 : } else if (result.ptr != value_end) {
318 297840 : auto const initial_ptr = result.ptr;
319 367920 : while (delimiter != ' ' && result.ptr != value_end) {
320 297840 : if (*result.ptr != ' ') {
321 262800 : break;
322 : }
323 35040 : ++result.ptr;
324 : }
325 297840 : if (result.ptr == value_end) {
326 35040 : index -= (value_end - initial_ptr);
327 35040 : index_into_cur_line -= (value_end - initial_ptr);
328 35040 : return val;
329 : }
330 262800 : return rtrim(value);
331 : }
332 : // double integral;
333 : // double fractional = std::modf(val, &integral);
334 : // if (fractional == 0) {
335 : // return static_cast<int>(fractional);
336 : // }
337 10827360 : return val;
338 :
339 : // auto const convert_double = [](std::string_view str) -> json {
340 : // double val;
341 : // auto result = fast_float::from_chars(str.data(), str.data() + str.size(), val);
342 : // if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range || result.ptr != str.end()) {
343 : // return rtrim(str);
344 : // }
345 : // return val;
346 : // };
347 : //
348 : // auto const convert_int = [&convert_double](std::string_view str) -> json {
349 : // int val;
350 : // auto result = std::from_chars(str.data(), str.data() + str.size(), val);
351 : // if (result.ec == std::errc::result_out_of_range) {
352 : // return convert_double(str);
353 : // } else if (result.ec == std::errc::invalid_argument) {
354 : // if (*result.ptr == '.') {
355 : // return convert_double(str);
356 : // } else {
357 : // return rtrim(str);
358 : // }
359 : // } else if (result.ptr != str.end()) {
360 : // if (*result.ptr == '.' || *result.ptr == 'e' || *result.ptr == 'E') {
361 : // return convert_double(str);
362 : // } else {
363 : // return rtrim(str);
364 : // }
365 : // }
366 : // return val;
367 : // };
368 : //
369 : // return convert_int(value);
370 : }
371 :
372 : // std::string CsvParser::parse_string(std::string_view csv, size_t &index)
373 : //{
374 : // eat_whitespace(csv, index);
375 : //
376 : // std::string str;
377 : // char c;
378 : //
379 : // while (true) {
380 : // if (index == csv_size) {
381 : // break;
382 : // }
383 : //
384 : // c = csv[index];
385 : // increment_both_index(index, index_into_cur_line);
386 : // if (c == delimiter || c == '\n') {
387 : // decrement_both_index(index, index_into_cur_line);
388 : // break;
389 : // } else if (c == '\r') {
390 : // continue;
391 : // } else {
392 : // str += c;
393 : // }
394 : // }
395 : //
396 : // return rtrim(str);
397 : //}
398 :
399 22321092 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
400 : {
401 22321092 : size_t save_index = index;
402 22321092 : size_t save_line_num = cur_line_num;
403 22321092 : size_t save_line_index = index_into_cur_line;
404 22321092 : size_t save_beginning_of_line_index = beginning_of_line_index;
405 22321092 : Token token = next_token(csv, save_index);
406 22321092 : cur_line_num = save_line_num;
407 22321092 : index_into_cur_line = save_line_index;
408 22321092 : beginning_of_line_index = save_beginning_of_line_index;
409 22321092 : return token;
410 : }
411 :
412 33524022 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
413 : {
414 33524022 : eat_whitespace(csv, index);
415 :
416 33524022 : if (index == csv_size) {
417 0 : return Token::FILE_END;
418 : }
419 :
420 33524022 : char const c = csv[index];
421 33524022 : if (c == delimiter) {
422 21848360 : increment_both_index(index, index_into_cur_line);
423 21848360 : return Token::DELIMITER;
424 11675662 : } else if (c == '\n') {
425 543152 : increment_both_index(index, cur_line_num);
426 543152 : beginning_of_line_index = index;
427 543152 : index_into_cur_line = 0;
428 543152 : return Token::LINE_END;
429 : }
430 11132510 : increment_both_index(index, index_into_cur_line);
431 11132510 : return Token::VALUE;
432 : }
433 :
434 263105 : std::string_view CsvParser::rtrim(std::string_view str)
435 : {
436 : static constexpr std::string_view whitespace(" \t", 2);
437 263105 : if (str.empty()) {
438 0 : return str;
439 : }
440 263105 : auto const index = str.find_last_not_of(whitespace);
441 263105 : if (index == std::string::npos) {
442 0 : str.remove_suffix(str.size());
443 0 : return str;
444 263105 : } else if (index + 1 < str.length()) {
445 2 : return str.substr(0, index + 1);
446 : }
447 263103 : return str;
448 : }
449 :
450 34120066 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
451 : {
452 34120066 : index++;
453 34120066 : line_index++;
454 34120066 : }
455 :
456 0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
457 : {
458 0 : index--;
459 0 : line_index--;
460 0 : }
461 :
462 45245571 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
463 : {
464 45841615 : while (index < csv_size) {
465 45841615 : if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
466 596044 : increment_both_index(index, index_into_cur_line);
467 596044 : continue;
468 : } else {
469 44649527 : return;
470 : }
471 : }
472 : }
|