LCOV - lcov.output.filtered - EnergyPlus/InputProcessing/CsvParser.cc

LCOV - code coverage report

Current view:	top level - EnergyPlus/InputProcessing - CsvParser.cc (source / functions)		Coverage	Total	Hit
Test:	lcov.output.filtered	Lines:	84.8 %	197	167
Test Date:	2025-06-02 07:23:51	Functions:	86.7 %	15	13

            Line data    Source code

       1              : // EnergyPlus, Copyright (c) 1996-2025, The Board of Trustees of the University of Illinois,
       2              : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
       3              : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
       4              : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
       5              : // contributors. All rights reserved.
       6              : //
       7              : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
       8              : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
       9              : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
      10              : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
      11              : // derivative works, and perform publicly and display publicly, and to permit others to do so.
      12              : //
      13              : // Redistribution and use in source and binary forms, with or without modification, are permitted
      14              : // provided that the following conditions are met:
      15              : //
      16              : // (1) Redistributions of source code must retain the above copyright notice, this list of
      17              : //     conditions and the following disclaimer.
      18              : //
      19              : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
      20              : //     conditions and the following disclaimer in the documentation and/or other materials
      21              : //     provided with the distribution.
      22              : //
      23              : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
      24              : //     the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
      25              : //     used to endorse or promote products derived from this software without specific prior
      26              : //     written permission.
      27              : //
      28              : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
      29              : //     without changes from the version obtained under this License, or (ii) Licensee makes a
      30              : //     reference solely to the software portion of its product, Licensee must refer to the
      31              : //     software as "EnergyPlus version X" software, where "X" is the version number Licensee
      32              : //     obtained under this License and may not use a different name for the software. Except as
      33              : //     specifically required in this Section (4), Licensee shall not use in a company name, a
      34              : //     product name, in advertising, publicity, or other promotional activities any name, trade
      35              : //     name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
      36              : //     similar designation, without the U.S. Department of Energy's prior written consent.
      37              : //
      38              : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
      39              : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      40              : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
      41              : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      42              : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      43              : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      44              : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      45              : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      46              : // POSSIBILITY OF SUCH DAMAGE.
      47              : 
      48              : #include <EnergyPlus/InputProcessing/CsvParser.hh>
      49              : #include <cstddef>
      50              : #include <fast_float/fast_float.h>
      51              : #include <fmt/format.h>
      52              : #include <milo/dtoa.h>
      53              : #include <milo/itoa.h>
      54              : 
      55              : using json = nlohmann::json;
      56              : 
      57            0 : std::vector<std::pair<std::string, bool>> const &CsvParser::errors()
      58              : {
      59            0 :     return errors_;
      60              : }
      61              : 
      62           10 : bool CsvParser::hasErrors()
      63              : {
      64           10 :     return !errors_.empty();
      65              : }
      66              : 
      67           10 : json CsvParser::decode(std::string_view csv, char t_delimiter, int t_rows_to_skip)
      68              : {
      69           10 :     if (csv.empty()) {
      70            0 :         errors_.emplace_back("CSV File is empty", false);
      71            0 :         success = false;
      72            0 :         return nullptr;
      73              :     }
      74              : 
      75           10 :     success = true;
      76           10 :     cur_line_num = 1;
      77           10 :     index_into_cur_line = 0;
      78           10 :     beginning_of_line_index = 0;
      79           10 :     delimiter = t_delimiter;
      80           10 :     rows_to_skip = t_rows_to_skip;
      81           10 :     csv_size = csv.size();
      82              : 
      83           10 :     size_t index = 0;
      84           10 :     return parse_csv(csv, index);
      85              : }
      86              : 
      87            1 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
      88              : {
      89              :     Token token;
      90            1 :     int rows_skipped = 0;
      91              :     while (true) {
      92          273 :         token = next_token(csv, index);
      93          273 :         if (token == Token::FILE_END) {
      94            0 :             break;
      95          273 :         } else if (token == Token::LINE_END) {
      96            4 :             ++rows_skipped;
      97            4 :             if (rows_skipped == rows_to_skip) {
      98            1 :                 break;
      99              :             }
     100              :         }
     101              :     }
     102            1 : }
     103              : 
     104           10 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
     105              : {
     106              :     Token token;
     107              :     Token prev_token;
     108           10 :     int num_columns = 0;
     109              : 
     110           10 :     size_t save_index = index;
     111           10 :     size_t save_line_num = cur_line_num;
     112           10 :     size_t save_line_index = index_into_cur_line;
     113           10 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     114              : 
     115              :     while (true) {
     116         3375 :         token = next_token(csv, save_index);
     117         3375 :         if (token == Token::FILE_END) {
     118            0 :             break;
     119         3375 :         } else if (token == Token::DELIMITER) {
     120          303 :             ++num_columns;
     121         3072 :         } else if (token == Token::LINE_END) {
     122              :             // Catch a trailing comma, such as Shading files from E+ 22.2.0 and below
     123           10 :             if (prev_token != Token::DELIMITER) {
     124            8 :                 ++num_columns;
     125              :             }
     126           10 :             break;
     127              :         }
     128         3365 :         prev_token = token;
     129              :     }
     130              : 
     131           10 :     cur_line_num = save_line_num;
     132           10 :     index_into_cur_line = save_line_index;
     133           10 :     beginning_of_line_index = save_beginning_of_line_index;
     134              : 
     135           10 :     return num_columns;
     136              : }
     137              : 
     138           10 : json CsvParser::parse_csv(std::string_view csv, size_t &index)
     139              : {
     140           90 :     json root = {{"header", json::array()}, {"values", json::array()}};
     141           10 :     bool check_first_row = true;
     142           10 :     bool has_header = (rows_to_skip == 1);
     143              : 
     144           10 :     constexpr size_t reservedSize = 8764 * 4;
     145              : 
     146           10 :     if (csv_size > 3) {
     147              :         // UTF-8 Byte Order Mark
     148           10 :         if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
     149            0 :             index += 3;
     150            0 :             index_into_cur_line += 3;
     151              :         }
     152              :     }
     153              : 
     154           10 :     if (rows_to_skip > 1) {
     155            1 :         skip_rows(csv, index);
     156              :     }
     157              : 
     158           10 :     json &header = root["header"];
     159           10 :     json &columns = root["values"];
     160              :     while (true) {
     161       271580 :         if (index == csv_size) {
     162           10 :             break;
     163              :         } else {
     164       271570 :             if (check_first_row) {
     165              :                 // Parse the header first, it could have an extra '()' for shading in 22.2.0 and below
     166           10 :                 if (has_header) {
     167            9 :                     parse_header(csv, index, header);
     168              :                 }
     169           10 :                 int num_columns = find_number_columns(csv, index);
     170           10 :                 check_first_row = false;
     171              : 
     172          321 :                 for (int i = 0; i < num_columns; ++i) {
     173          311 :                     auto arr = std::vector<json>(); // (THIS_AUTO_OK)
     174          311 :                     arr.reserve(reservedSize);
     175          311 :                     columns.push_back(std::move(arr));
     176          311 :                 }
     177              : 
     178           10 :                 continue;
     179           10 :             }
     180              : 
     181       271560 :             parse_line(csv, index, columns);
     182       271560 :             if (!success) {
     183            0 :                 break; // Bail early
     184              :             }
     185              :         }
     186       271570 :     }
     187              : 
     188           10 :     return root;
     189           80 : }
     190              : 
     191            9 : void CsvParser::parse_header(std::string_view csv, size_t &index, json &header)
     192              : {
     193              :     Token token;
     194              : 
     195              :     while (true) {
     196          612 :         token = look_ahead(csv, index);
     197          612 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     198            9 :             next_token(csv, index);
     199            9 :             return;
     200          603 :         } else if (token == Token::DELIMITER) {
     201          298 :             next_token(csv, index);
     202              :         } else {
     203          305 :             header.push_back(parse_value(csv, index));
     204              :         }
     205              :     }
     206              : }
     207              : 
     208       271560 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
     209              : {
     210              :     Token token;
     211       271560 :     size_t column_num = 0;
     212       271560 :     size_t parsed_values = 0;
     213       271560 :     const size_t num_columns = columns.size(); // Csv isn't empty, so we know it's at least 1
     214              : 
     215       271560 :     size_t this_cur_line_num = cur_line_num;
     216       271560 :     size_t this_beginning_of_line_index = beginning_of_line_index;
     217              : 
     218              :     while (true) {
     219     22320480 :         token = look_ahead(csv, index);
     220     22320480 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     221       271560 :             if (parsed_values != num_columns) {
     222            0 :                 success = false;
     223              : 
     224            0 :                 size_t found_index = csv.find_first_of("\r\n", this_beginning_of_line_index);
     225            0 :                 std::string line;
     226            0 :                 if (found_index != std::string::npos) {
     227            0 :                     line = csv.substr(this_beginning_of_line_index, found_index - this_beginning_of_line_index);
     228              :                 }
     229            0 :                 errors_.emplace_back(
     230            0 :                     fmt::format(
     231              :                         "CsvParser - Line {} - Expected {} columns, got {}. Error in following line.", this_cur_line_num, num_columns, parsed_values),
     232            0 :                     false);
     233            0 :                 errors_.emplace_back(line, true);
     234            0 :             }
     235       271560 :             next_token(csv, index);
     236       271560 :             return;
     237     22048920 :         } else if (token == Token::DELIMITER) {
     238     10923720 :             next_token(csv, index);
     239     10923720 :             ++column_num;
     240              :         } else {
     241     11125200 :             columns.at(column_num).push_back(parse_value(csv, index));
     242     11125200 :             ++parsed_values;
     243              :         }
     244     22048920 :     }
     245              : }
     246              : 
     247     11125505 : json CsvParser::parse_value(std::string_view csv, size_t &index)
     248              : {
     249     11125505 :     eat_whitespace(csv, index);
     250              : 
     251     11125505 :     size_t save_i = index;
     252              : 
     253              :     while (true) {
     254    121194113 :         if (save_i == csv_size) {
     255            0 :             break;
     256              :         }
     257              : 
     258    121194113 :         char const c = csv[save_i];
     259    121194113 :         if (c == delimiter || c == '\n' || c == '\r') {
     260              :             break;
     261              :         }
     262    110068608 :         ++save_i;
     263    110068608 :     }
     264              : 
     265     11125505 :     size_t diff = save_i - index;
     266     11125505 :     std::string_view value = csv.substr(index, diff);
     267     11125505 :     index_into_cur_line += diff;
     268     11125505 :     index = save_i;
     269              : 
     270     11125505 :     size_t plus_sign = 0;
     271     11125505 :     if (value.front() == '+') {
     272            0 :         plus_sign = 1;
     273              :     }
     274              : 
     275     11125505 :     auto const value_end = value.data() + value.size(); // have to do this for MSVC // (AUTO_OK_ITER)
     276              : 
     277              :     double val;
     278     11125505 :     auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val); // (AUTO_OK_OBJ)
     279     11125505 :     if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
     280          305 :         return rtrim(value);
     281     11125200 :     } else if (result.ptr != value_end) {
     282       297840 :         auto const initial_ptr = result.ptr; // (THIS_AUTO_OK)
     283       332880 :         while (delimiter != ' ' && result.ptr != value_end) {
     284       297840 :             if (*result.ptr != ' ') {
     285       262800 :                 break;
     286              :             }
     287        35040 :             ++result.ptr;
     288              :         }
     289       297840 :         if (result.ptr == value_end) {
     290        35040 :             index -= (value_end - initial_ptr);
     291        35040 :             index_into_cur_line -= (value_end - initial_ptr);
     292        35040 :             return val;
     293              :         }
     294       262800 :         return rtrim(value);
     295              :     }
     296              : 
     297     10827360 :     return val;
     298              : }
     299              : 
     300     22321092 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
     301              : {
     302     22321092 :     size_t save_index = index;
     303     22321092 :     size_t save_line_num = cur_line_num;
     304     22321092 :     size_t save_line_index = index_into_cur_line;
     305     22321092 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     306     22321092 :     Token token = next_token(csv, save_index);
     307     22321092 :     cur_line_num = save_line_num;
     308     22321092 :     index_into_cur_line = save_line_index;
     309     22321092 :     beginning_of_line_index = save_beginning_of_line_index;
     310     22321092 :     return token;
     311              : }
     312              : 
     313     33520327 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
     314              : {
     315     33520327 :     eat_whitespace(csv, index);
     316              : 
     317     33520327 :     if (index == csv_size) {
     318            0 :         return Token::FILE_END;
     319              :     }
     320              : 
     321     33520327 :     char const c = csv[index];
     322     33520327 :     if (c == delimiter) {
     323     21848360 :         increment_both_index(index, index_into_cur_line);
     324     21848360 :         return Token::DELIMITER;
     325     11671967 :     } else if (c == '\n') {
     326       543152 :         increment_both_index(index, cur_line_num);
     327       543152 :         beginning_of_line_index = index;
     328       543152 :         index_into_cur_line = 0;
     329       543152 :         return Token::LINE_END;
     330              :     }
     331     11128815 :     increment_both_index(index, index_into_cur_line);
     332     11128815 :     return Token::VALUE;
     333              : }
     334              : 
     335       263105 : std::string_view CsvParser::rtrim(std::string_view str)
     336              : {
     337              :     static constexpr std::string_view whitespace(" \t", 2);
     338       263105 :     if (str.empty()) {
     339            0 :         return str;
     340              :     }
     341       263105 :     size_t const index = str.find_last_not_of(whitespace);
     342       263105 :     if (index == std::string::npos) {
     343            0 :         str.remove_suffix(str.size());
     344            0 :         return str;
     345       263105 :     } else if (index + 1 < str.length()) {
     346            2 :         return str.substr(0, index + 1);
     347              :     }
     348       263103 :     return str;
     349              : }
     350              : 
     351     34116048 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
     352              : {
     353     34116048 :     index++;
     354     34116048 :     line_index++;
     355     34116048 : }
     356              : 
     357            0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
     358              : {
     359            0 :     index--;
     360            0 :     line_index--;
     361            0 : }
     362              : 
     363     44645832 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
     364              : {
     365     45241553 :     while (index < csv_size) {
     366     45241553 :         if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
     367       595721 :             increment_both_index(index, index_into_cur_line);
     368       595721 :             continue;
     369              :         } else {
     370     44645832 :             return;
     371              :         }
     372              :     }
     373              : }

Generated by: LCOV version 2.0-1