LCOV - lcov.output.filtered - EnergyPlus/InputProcessing/CsvParser.cc

LCOV - code coverage report

Current view:	top level - EnergyPlus/InputProcessing - CsvParser.cc (source / functions)		Coverage	Total	Hit
Test:	lcov.output.filtered	Lines:	83.8 %	197	165
Test Date:	2025-05-22 16:09:37	Functions:	86.7 %	15	13

            Line data    Source code

       1              : // EnergyPlus, Copyright (c) 1996-2025, The Board of Trustees of the University of Illinois,
       2              : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
       3              : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
       4              : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
       5              : // contributors. All rights reserved.
       6              : //
       7              : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
       8              : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
       9              : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
      10              : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
      11              : // derivative works, and perform publicly and display publicly, and to permit others to do so.
      12              : //
      13              : // Redistribution and use in source and binary forms, with or without modification, are permitted
      14              : // provided that the following conditions are met:
      15              : //
      16              : // (1) Redistributions of source code must retain the above copyright notice, this list of
      17              : //     conditions and the following disclaimer.
      18              : //
      19              : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
      20              : //     conditions and the following disclaimer in the documentation and/or other materials
      21              : //     provided with the distribution.
      22              : //
      23              : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
      24              : //     the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
      25              : //     used to endorse or promote products derived from this software without specific prior
      26              : //     written permission.
      27              : //
      28              : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
      29              : //     without changes from the version obtained under this License, or (ii) Licensee makes a
      30              : //     reference solely to the software portion of its product, Licensee must refer to the
      31              : //     software as "EnergyPlus version X" software, where "X" is the version number Licensee
      32              : //     obtained under this License and may not use a different name for the software. Except as
      33              : //     specifically required in this Section (4), Licensee shall not use in a company name, a
      34              : //     product name, in advertising, publicity, or other promotional activities any name, trade
      35              : //     name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
      36              : //     similar designation, without the U.S. Department of Energy's prior written consent.
      37              : //
      38              : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
      39              : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      40              : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
      41              : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      42              : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      43              : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      44              : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      45              : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      46              : // POSSIBILITY OF SUCH DAMAGE.
      47              : 
      48              : #include <EnergyPlus/InputProcessing/CsvParser.hh>
      49              : #include <cstddef>
      50              : #include <fast_float/fast_float.h>
      51              : #include <fmt/format.h>
      52              : #include <milo/dtoa.h>
      53              : #include <milo/itoa.h>
      54              : 
      55              : using json = nlohmann::json;
      56              : 
      57            1 : std::vector<std::pair<std::string, bool>> const &CsvParser::errors()
      58              : {
      59            1 :     return errors_;
      60              : }
      61              : 
      62            3 : bool CsvParser::hasErrors()
      63              : {
      64            3 :     return !errors_.empty();
      65              : }
      66              : 
      67            3 : json CsvParser::decode(std::string_view csv, char t_delimiter, int t_rows_to_skip)
      68              : {
      69            3 :     if (csv.empty()) {
      70            0 :         errors_.emplace_back("CSV File is empty", false);
      71            0 :         success = false;
      72            0 :         return nullptr;
      73              :     }
      74              : 
      75            3 :     success = true;
      76            3 :     cur_line_num = 1;
      77            3 :     index_into_cur_line = 0;
      78            3 :     beginning_of_line_index = 0;
      79            3 :     delimiter = t_delimiter;
      80            3 :     rows_to_skip = t_rows_to_skip;
      81            3 :     csv_size = csv.size();
      82              : 
      83            3 :     size_t index = 0;
      84            3 :     return parse_csv(csv, index);
      85              : }
      86              : 
      87            0 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
      88              : {
      89              :     Token token;
      90            0 :     int rows_skipped = 0;
      91              :     while (true) {
      92            0 :         token = next_token(csv, index);
      93            0 :         if (token == Token::FILE_END) {
      94            0 :             break;
      95            0 :         } else if (token == Token::LINE_END) {
      96            0 :             ++rows_skipped;
      97            0 :             if (rows_skipped == rows_to_skip) {
      98            0 :                 break;
      99              :             }
     100              :         }
     101              :     }
     102            0 : }
     103              : 
     104            3 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
     105              : {
     106              :     Token token;
     107              :     Token prev_token;
     108            3 :     int num_columns = 0;
     109              : 
     110            3 :     size_t save_index = index;
     111            3 :     size_t save_line_num = cur_line_num;
     112            3 :     size_t save_line_index = index_into_cur_line;
     113            3 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     114              : 
     115              :     while (true) {
     116           51 :         token = next_token(csv, save_index);
     117           51 :         if (token == Token::FILE_END) {
     118            0 :             break;
     119           51 :         } else if (token == Token::DELIMITER) {
     120            5 :             ++num_columns;
     121           46 :         } else if (token == Token::LINE_END) {
     122              :             // Catch a trailing comma, such as Shading files from E+ 22.2.0 and below
     123            3 :             if (prev_token != Token::DELIMITER) {
     124            2 :                 ++num_columns;
     125              :             }
     126            3 :             break;
     127              :         }
     128           48 :         prev_token = token;
     129              :     }
     130              : 
     131            3 :     cur_line_num = save_line_num;
     132            3 :     index_into_cur_line = save_line_index;
     133            3 :     beginning_of_line_index = save_beginning_of_line_index;
     134              : 
     135            3 :     return num_columns;
     136              : }
     137              : 
     138            3 : json CsvParser::parse_csv(std::string_view csv, size_t &index)
     139              : {
     140           27 :     json root = {{"header", json::array()}, {"values", json::array()}};
     141            3 :     bool check_first_row = true;
     142            3 :     bool has_header = (rows_to_skip == 1);
     143              : 
     144            3 :     constexpr size_t reservedSize = 8764 * 4;
     145              : 
     146            3 :     if (csv_size > 3) {
     147              :         // UTF-8 Byte Order Mark
     148            3 :         if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
     149            0 :             index += 3;
     150            0 :             index_into_cur_line += 3;
     151              :         }
     152              :     }
     153              : 
     154            3 :     if (rows_to_skip > 1) {
     155            0 :         skip_rows(csv, index);
     156              :     }
     157              : 
     158            3 :     json &header = root["header"];
     159            3 :     json &columns = root["values"];
     160              :     while (true) {
     161        43807 :         if (index == csv_size) {
     162            2 :             break;
     163              :         } else {
     164        43805 :             if (check_first_row) {
     165              :                 // Parse the header first, it could have an extra '()' for shading in 22.2.0 and below
     166            3 :                 if (has_header) {
     167            3 :                     parse_header(csv, index, header);
     168              :                 }
     169            3 :                 int num_columns = find_number_columns(csv, index);
     170            3 :                 check_first_row = false;
     171              : 
     172           10 :                 for (int i = 0; i < num_columns; ++i) {
     173            7 :                     auto arr = std::vector<json>(); // (THIS_AUTO_OK)
     174            7 :                     arr.reserve(reservedSize);
     175            7 :                     columns.push_back(std::move(arr));
     176            7 :                 }
     177              : 
     178            3 :                 continue;
     179            3 :             }
     180              : 
     181        43802 :             parse_line(csv, index, columns);
     182        43802 :             if (!success) {
     183            1 :                 break; // Bail early
     184              :             }
     185              :         }
     186        43804 :     }
     187              : 
     188            3 :     return root;
     189           24 : }
     190              : 
     191            3 : void CsvParser::parse_header(std::string_view csv, size_t &index, json &header)
     192              : {
     193              :     Token token;
     194              : 
     195              :     while (true) {
     196           16 :         token = look_ahead(csv, index);
     197           16 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     198            3 :             next_token(csv, index);
     199            3 :             return;
     200           13 :         } else if (token == Token::DELIMITER) {
     201            5 :             next_token(csv, index);
     202              :         } else {
     203            8 :             header.push_back(parse_value(csv, index));
     204              :         }
     205              :     }
     206              : }
     207              : 
     208        43802 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
     209              : {
     210              :     Token token;
     211        43802 :     size_t column_num = 0;
     212        43802 :     size_t parsed_values = 0;
     213        43802 :     const size_t num_columns = columns.size(); // Csv isn't empty, so we know it's at least 1
     214              : 
     215        43802 :     size_t this_cur_line_num = cur_line_num;
     216        43802 :     size_t this_beginning_of_line_index = beginning_of_line_index;
     217              : 
     218              :     while (true) {
     219       210251 :         token = look_ahead(csv, index);
     220       210251 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     221        43802 :             if (parsed_values != num_columns) {
     222            1 :                 success = false;
     223              : 
     224            1 :                 size_t found_index = csv.find_first_of("\r\n", this_beginning_of_line_index);
     225            1 :                 std::string line;
     226            1 :                 if (found_index != std::string::npos) {
     227            1 :                     line = csv.substr(this_beginning_of_line_index, found_index - this_beginning_of_line_index);
     228              :                 }
     229            2 :                 errors_.emplace_back(
     230            1 :                     fmt::format(
     231              :                         "CsvParser - Line {} - Expected {} columns, got {}. Error in following line.", this_cur_line_num, num_columns, parsed_values),
     232            1 :                     false);
     233            1 :                 errors_.emplace_back(line, true);
     234            1 :             }
     235        43802 :             next_token(csv, index);
     236        43802 :             return;
     237       166449 :         } else if (token == Token::DELIMITER) {
     238        78844 :             next_token(csv, index);
     239        78844 :             ++column_num;
     240              :         } else {
     241        87605 :             columns.at(column_num).push_back(parse_value(csv, index));
     242        87605 :             ++parsed_values;
     243              :         }
     244       166449 :     }
     245              : }
     246              : 
     247        87613 : json CsvParser::parse_value(std::string_view csv, size_t &index)
     248              : {
     249        87613 :     eat_whitespace(csv, index);
     250              : 
     251        87613 :     size_t save_i = index;
     252              : 
     253              :     while (true) {
     254       963300 :         if (save_i == csv_size) {
     255            0 :             break;
     256              :         }
     257              : 
     258       963300 :         char const c = csv[save_i];
     259       963300 :         if (c == delimiter || c == '\n' || c == '\r') {
     260              :             break;
     261              :         }
     262       875687 :         ++save_i;
     263       875687 :     }
     264              : 
     265        87613 :     size_t diff = save_i - index;
     266        87613 :     std::string_view value = csv.substr(index, diff);
     267        87613 :     index_into_cur_line += diff;
     268        87613 :     index = save_i;
     269              : 
     270        87613 :     size_t plus_sign = 0;
     271        87613 :     if (value.front() == '+') {
     272            0 :         plus_sign = 1;
     273              :     }
     274              : 
     275        87613 :     auto const value_end = value.data() + value.size(); // have to do this for MSVC // (AUTO_OK_ITER)
     276              : 
     277              :     double val;
     278        87613 :     auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val); // (AUTO_OK_OBJ)
     279        87613 :     if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
     280            8 :         return rtrim(value);
     281        87605 :     } else if (result.ptr != value_end) {
     282        43800 :         auto const initial_ptr = result.ptr; // (THIS_AUTO_OK)
     283        43800 :         while (delimiter != ' ' && result.ptr != value_end) {
     284        43800 :             if (*result.ptr != ' ') {
     285        43800 :                 break;
     286              :             }
     287            0 :             ++result.ptr;
     288              :         }
     289        43800 :         if (result.ptr == value_end) {
     290            0 :             index -= (value_end - initial_ptr);
     291            0 :             index_into_cur_line -= (value_end - initial_ptr);
     292            0 :             return val;
     293              :         }
     294        43800 :         return rtrim(value);
     295              :     }
     296              : 
     297        43805 :     return val;
     298              : }
     299              : 
     300       210267 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
     301              : {
     302       210267 :     size_t save_index = index;
     303       210267 :     size_t save_line_num = cur_line_num;
     304       210267 :     size_t save_line_index = index_into_cur_line;
     305       210267 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     306       210267 :     Token token = next_token(csv, save_index);
     307       210267 :     cur_line_num = save_line_num;
     308       210267 :     index_into_cur_line = save_line_index;
     309       210267 :     beginning_of_line_index = save_beginning_of_line_index;
     310       210267 :     return token;
     311              : }
     312              : 
     313       332972 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
     314              : {
     315       332972 :     eat_whitespace(csv, index);
     316              : 
     317       332972 :     if (index == csv_size) {
     318            0 :         return Token::FILE_END;
     319              :     }
     320              : 
     321       332972 :     char const c = csv[index];
     322       332972 :     if (c == delimiter) {
     323       157703 :         increment_both_index(index, index_into_cur_line);
     324       157703 :         return Token::DELIMITER;
     325       175269 :     } else if (c == '\n') {
     326        87613 :         increment_both_index(index, cur_line_num);
     327        87613 :         beginning_of_line_index = index;
     328        87613 :         index_into_cur_line = 0;
     329        87613 :         return Token::LINE_END;
     330              :     }
     331        87656 :     increment_both_index(index, index_into_cur_line);
     332        87656 :     return Token::VALUE;
     333              : }
     334              : 
     335        43808 : std::string_view CsvParser::rtrim(std::string_view str)
     336              : {
     337              :     static constexpr std::string_view whitespace(" \t", 2);
     338        43808 :     if (str.empty()) {
     339            0 :         return str;
     340              :     }
     341        43808 :     size_t const index = str.find_last_not_of(whitespace);
     342        43808 :     if (index == std::string::npos) {
     343            0 :         str.remove_suffix(str.size());
     344            0 :         return str;
     345        43808 :     } else if (index + 1 < str.length()) {
     346            0 :         return str.substr(0, index + 1);
     347              :     }
     348        43808 :     return str;
     349              : }
     350              : 
     351       403055 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
     352              : {
     353       403055 :     index++;
     354       403055 :     line_index++;
     355       403055 : }
     356              : 
     357            0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
     358              : {
     359            0 :     index--;
     360            0 :     line_index--;
     361            0 : }
     362              : 
     363       420585 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
     364              : {
     365       490668 :     while (index < csv_size) {
     366       490668 :         if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
     367        70083 :             increment_both_index(index, index_into_cur_line);
     368        70083 :             continue;
     369              :         } else {
     370       420585 :             return;
     371              :         }
     372              :     }
     373              : }

Generated by: LCOV version 2.0-1