Line data Source code
1 : // EnergyPlus, Copyright (c) 1996-2024, The Board of Trustees of the University of Illinois,
2 : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
3 : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
4 : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
5 : // contributors. All rights reserved.
6 : //
7 : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
8 : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
9 : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
10 : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
11 : // derivative works, and perform publicly and display publicly, and to permit others to do so.
12 : //
13 : // Redistribution and use in source and binary forms, with or without modification, are permitted
14 : // provided that the following conditions are met:
15 : //
16 : // (1) Redistributions of source code must retain the above copyright notice, this list of
17 : // conditions and the following disclaimer.
18 : //
19 : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
20 : // conditions and the following disclaimer in the documentation and/or other materials
21 : // provided with the distribution.
22 : //
23 : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
24 : // the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
25 : // used to endorse or promote products derived from this software without specific prior
26 : // written permission.
27 : //
28 : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
29 : // without changes from the version obtained under this License, or (ii) Licensee makes a
30 : // reference solely to the software portion of its product, Licensee must refer to the
31 : // software as "EnergyPlus version X" software, where "X" is the version number Licensee
32 : // obtained under this License and may not use a different name for the software. Except as
33 : // specifically required in this Section (4), Licensee shall not use in a company name, a
34 : // product name, in advertising, publicity, or other promotional activities any name, trade
35 : // name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
36 : // similar designation, without the U.S. Department of Energy's prior written consent.
37 : //
38 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
39 : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
40 : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
41 : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
43 : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
46 : // POSSIBILITY OF SUCH DAMAGE.
47 :
48 : #include <EnergyPlus/InputProcessing/CsvParser.hh>
49 : #include <cstddef>
50 : #include <fast_float/fast_float.h>
51 : #include <fmt/format.h>
52 : #include <milo/dtoa.h>
53 : #include <milo/itoa.h>
54 :
55 : using json = nlohmann::json;
56 :
57 0 : std::vector<std::pair<std::string, bool>> const &CsvParser::errors()
58 : {
59 0 : return errors_;
60 : }
61 :
62 10 : bool CsvParser::hasErrors()
63 : {
64 10 : return !errors_.empty();
65 : }
66 :
67 10 : json CsvParser::decode(std::string_view csv, char t_delimiter, int t_rows_to_skip)
68 : {
69 10 : if (csv.empty()) {
70 0 : errors_.emplace_back("CSV File is empty", false);
71 0 : success = false;
72 0 : return nullptr;
73 : }
74 :
75 10 : success = true;
76 10 : cur_line_num = 1;
77 10 : index_into_cur_line = 0;
78 10 : beginning_of_line_index = 0;
79 10 : delimiter = t_delimiter;
80 10 : rows_to_skip = t_rows_to_skip;
81 10 : csv_size = csv.size();
82 :
83 10 : size_t index = 0;
84 10 : return parse_csv(csv, index);
85 : }
86 :
87 1 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
88 : {
89 : Token token;
90 1 : int rows_skipped = 0;
91 : while (true) {
92 273 : token = next_token(csv, index);
93 273 : if (token == Token::FILE_END) {
94 0 : break;
95 273 : } else if (token == Token::LINE_END) {
96 4 : ++rows_skipped;
97 4 : if (rows_skipped == rows_to_skip) {
98 1 : break;
99 : }
100 : }
101 : }
102 1 : }
103 :
104 10 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
105 : {
106 : Token token;
107 : Token prev_token;
108 10 : int num_columns = 0;
109 :
110 10 : size_t save_index = index;
111 10 : size_t save_line_num = cur_line_num;
112 10 : size_t save_line_index = index_into_cur_line;
113 10 : size_t save_beginning_of_line_index = beginning_of_line_index;
114 :
115 : while (true) {
116 3375 : token = next_token(csv, save_index);
117 3375 : if (token == Token::FILE_END) {
118 0 : break;
119 3375 : } else if (token == Token::DELIMITER) {
120 303 : ++num_columns;
121 3072 : } else if (token == Token::LINE_END) {
122 : // Catch a trailing comma, such as Shading files from E+ 22.2.0 and below
123 10 : if (prev_token != Token::DELIMITER) {
124 8 : ++num_columns;
125 : }
126 10 : break;
127 : }
128 3365 : prev_token = token;
129 : }
130 :
131 10 : cur_line_num = save_line_num;
132 10 : index_into_cur_line = save_line_index;
133 10 : beginning_of_line_index = save_beginning_of_line_index;
134 :
135 10 : return num_columns;
136 : }
137 :
138 10 : json CsvParser::parse_csv(std::string_view csv, size_t &index)
139 : {
140 100 : json root = {{"header", json::array()}, {"values", json::array()}};
141 10 : bool check_first_row = true;
142 10 : bool has_header = (rows_to_skip == 1);
143 :
144 10 : constexpr size_t reservedSize = 8764 * 4;
145 :
146 10 : if (csv_size > 3) {
147 : // UTF-8 Byte Order Mark
148 10 : if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
149 0 : index += 3;
150 0 : index_into_cur_line += 3;
151 : }
152 : }
153 :
154 10 : if (rows_to_skip > 1) {
155 1 : skip_rows(csv, index);
156 : }
157 :
158 10 : json &header = root["header"];
159 10 : json &columns = root["values"];
160 : while (true) {
161 271580 : if (index == csv_size) {
162 10 : break;
163 : } else {
164 271570 : if (check_first_row) {
165 : // Parse the header first, it could have an extra '()' for shading in 22.2.0 and below
166 10 : if (has_header) {
167 9 : parse_header(csv, index, header);
168 : }
169 10 : int num_columns = find_number_columns(csv, index);
170 10 : check_first_row = false;
171 :
172 321 : for (int i = 0; i < num_columns; ++i) {
173 311 : auto arr = std::vector<json>(); // (THIS_AUTO_OK)
174 311 : arr.reserve(reservedSize);
175 311 : columns.push_back(std::move(arr));
176 311 : }
177 :
178 10 : continue;
179 10 : }
180 :
181 271560 : parse_line(csv, index, columns);
182 271560 : if (!success) {
183 0 : break; // Bail early
184 : }
185 : }
186 271570 : }
187 :
188 10 : return root;
189 0 : }
190 :
191 612 : void CsvParser::parse_header(std::string_view csv, size_t &index, json &header)
192 : {
193 : Token token;
194 :
195 : while (true) {
196 612 : token = look_ahead(csv, index);
197 612 : if (token == Token::LINE_END || token == Token::FILE_END) {
198 9 : next_token(csv, index);
199 9 : return;
200 603 : } else if (token == Token::DELIMITER) {
201 298 : next_token(csv, index);
202 : } else {
203 305 : header.push_back(parse_value(csv, index));
204 : }
205 : }
206 : }
207 :
208 271560 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
209 : {
210 : Token token;
211 271560 : size_t column_num = 0;
212 271560 : size_t parsed_values = 0;
213 271560 : const size_t num_columns = columns.size(); // Csv isn't empty, so we know it's at least 1
214 :
215 271560 : size_t this_cur_line_num = cur_line_num;
216 271560 : size_t this_beginning_of_line_index = beginning_of_line_index;
217 :
218 : while (true) {
219 22320480 : token = look_ahead(csv, index);
220 22320480 : if (token == Token::LINE_END || token == Token::FILE_END) {
221 271560 : if (parsed_values != num_columns) {
222 0 : success = false;
223 :
224 0 : size_t found_index = csv.find_first_of("\r\n", this_beginning_of_line_index);
225 0 : std::string line;
226 0 : if (found_index != std::string::npos) {
227 0 : line = csv.substr(this_beginning_of_line_index, found_index - this_beginning_of_line_index);
228 : }
229 0 : errors_.emplace_back(
230 0 : fmt::format(
231 : "CsvParser - Line {} - Expected {} columns, got {}. Error in following line.", this_cur_line_num, num_columns, parsed_values),
232 0 : false);
233 0 : errors_.emplace_back(line, true);
234 0 : }
235 271560 : next_token(csv, index);
236 271560 : return;
237 22048920 : } else if (token == Token::DELIMITER) {
238 10923720 : next_token(csv, index);
239 10923720 : ++column_num;
240 : } else {
241 11125200 : columns.at(column_num).push_back(parse_value(csv, index));
242 11125200 : ++parsed_values;
243 : }
244 22048920 : }
245 : }
246 :
247 11125505 : json CsvParser::parse_value(std::string_view csv, size_t &index)
248 : {
249 11125505 : eat_whitespace(csv, index);
250 :
251 11125505 : size_t save_i = index;
252 :
253 : while (true) {
254 121194113 : if (save_i == csv_size) {
255 0 : break;
256 : }
257 :
258 121194113 : char const c = csv[save_i];
259 121194113 : if (c == delimiter || c == '\n' || c == '\r') {
260 : break;
261 : }
262 110068608 : ++save_i;
263 110068608 : }
264 :
265 11125505 : size_t diff = save_i - index;
266 11125505 : std::string_view value = csv.substr(index, diff);
267 11125505 : index_into_cur_line += diff;
268 11125505 : index = save_i;
269 :
270 11125505 : size_t plus_sign = 0;
271 11125505 : if (value.front() == '+') {
272 0 : plus_sign = 1;
273 : }
274 :
275 11125505 : auto const value_end = value.data() + value.size(); // have to do this for MSVC // (AUTO_OK_ITER)
276 :
277 : double val;
278 11125505 : auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val); // (AUTO_OK_OBJ)
279 11125505 : if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
280 610 : return rtrim(value);
281 11125200 : } else if (result.ptr != value_end) {
282 297840 : auto const initial_ptr = result.ptr; // (THIS_AUTO_OK)
283 332880 : while (delimiter != ' ' && result.ptr != value_end) {
284 297840 : if (*result.ptr != ' ') {
285 262800 : break;
286 : }
287 35040 : ++result.ptr;
288 : }
289 297840 : if (result.ptr == value_end) {
290 35040 : index -= (value_end - initial_ptr);
291 35040 : index_into_cur_line -= (value_end - initial_ptr);
292 35040 : return val;
293 : }
294 525600 : return rtrim(value);
295 : }
296 :
297 10827360 : return val;
298 : }
299 :
300 22321092 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
301 : {
302 22321092 : size_t save_index = index;
303 22321092 : size_t save_line_num = cur_line_num;
304 22321092 : size_t save_line_index = index_into_cur_line;
305 22321092 : size_t save_beginning_of_line_index = beginning_of_line_index;
306 22321092 : Token token = next_token(csv, save_index);
307 22321092 : cur_line_num = save_line_num;
308 22321092 : index_into_cur_line = save_line_index;
309 22321092 : beginning_of_line_index = save_beginning_of_line_index;
310 22321092 : return token;
311 : }
312 :
313 33520327 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
314 : {
315 33520327 : eat_whitespace(csv, index);
316 :
317 33520327 : if (index == csv_size) {
318 0 : return Token::FILE_END;
319 : }
320 :
321 33520327 : char const c = csv[index];
322 33520327 : if (c == delimiter) {
323 21848360 : increment_both_index(index, index_into_cur_line);
324 21848360 : return Token::DELIMITER;
325 11671967 : } else if (c == '\n') {
326 543152 : increment_both_index(index, cur_line_num);
327 543152 : beginning_of_line_index = index;
328 543152 : index_into_cur_line = 0;
329 543152 : return Token::LINE_END;
330 : }
331 11128815 : increment_both_index(index, index_into_cur_line);
332 11128815 : return Token::VALUE;
333 : }
334 :
335 263105 : std::string_view CsvParser::rtrim(std::string_view str)
336 : {
337 : static constexpr std::string_view whitespace(" \t", 2);
338 263105 : if (str.empty()) {
339 0 : return str;
340 : }
341 263105 : size_t const index = str.find_last_not_of(whitespace);
342 263105 : if (index == std::string::npos) {
343 0 : str.remove_suffix(str.size());
344 0 : return str;
345 263105 : } else if (index + 1 < str.length()) {
346 2 : return str.substr(0, index + 1);
347 : }
348 263103 : return str;
349 : }
350 :
351 34116048 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
352 : {
353 34116048 : index++;
354 34116048 : line_index++;
355 34116048 : }
356 :
357 0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
358 : {
359 0 : index--;
360 0 : line_index--;
361 0 : }
362 :
363 44645832 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
364 : {
365 45241553 : while (index < csv_size) {
366 45241553 : if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
367 595721 : increment_both_index(index, index_into_cur_line);
368 595721 : continue;
369 : } else {
370 44645832 : return;
371 : }
372 : }
373 : }
|