LCOV - code coverage report
Current view: top level - EnergyPlus/InputProcessing - CsvParser.cc (source / functions) Hit Total Coverage
Test: lcov.output.filtered Lines: 166 197 84.3 %
Date: 2024-08-24 18:31:18 Functions: 13 15 86.7 %

          Line data    Source code
       1             : // EnergyPlus, Copyright (c) 1996-2024, The Board of Trustees of the University of Illinois,
       2             : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
       3             : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
       4             : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
       5             : // contributors. All rights reserved.
       6             : //
       7             : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
       8             : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
       9             : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
      10             : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
      11             : // derivative works, and perform publicly and display publicly, and to permit others to do so.
      12             : //
      13             : // Redistribution and use in source and binary forms, with or without modification, are permitted
      14             : // provided that the following conditions are met:
      15             : //
      16             : // (1) Redistributions of source code must retain the above copyright notice, this list of
      17             : //     conditions and the following disclaimer.
      18             : //
      19             : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
      20             : //     conditions and the following disclaimer in the documentation and/or other materials
      21             : //     provided with the distribution.
      22             : //
      23             : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
      24             : //     the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
      25             : //     used to endorse or promote products derived from this software without specific prior
      26             : //     written permission.
      27             : //
      28             : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
      29             : //     without changes from the version obtained under this License, or (ii) Licensee makes a
      30             : //     reference solely to the software portion of its product, Licensee must refer to the
      31             : //     software as "EnergyPlus version X" software, where "X" is the version number Licensee
      32             : //     obtained under this License and may not use a different name for the software. Except as
      33             : //     specifically required in this Section (4), Licensee shall not use in a company name, a
      34             : //     product name, in advertising, publicity, or other promotional activities any name, trade
      35             : //     name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
      36             : //     similar designation, without the U.S. Department of Energy's prior written consent.
      37             : //
      38             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
      39             : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      40             : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
      41             : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      42             : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      43             : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      44             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      45             : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      46             : // POSSIBILITY OF SUCH DAMAGE.
      47             : 
      48             : #include <EnergyPlus/InputProcessing/CsvParser.hh>
      49             : #include <cstddef>
      50             : #include <fast_float/fast_float.h>
      51             : #include <fmt/format.h>
      52             : #include <milo/dtoa.h>
      53             : #include <milo/itoa.h>
      54             : 
      55             : using json = nlohmann::json;
      56             : 
      57           0 : std::vector<std::pair<std::string, bool>> const &CsvParser::errors()
      58             : {
      59           0 :     return errors_;
      60             : }
      61             : 
      62          10 : bool CsvParser::hasErrors()
      63             : {
      64          10 :     return !errors_.empty();
      65             : }
      66             : 
      67          10 : json CsvParser::decode(std::string_view csv, char t_delimiter, int t_rows_to_skip)
      68             : {
      69          10 :     if (csv.empty()) {
      70           0 :         errors_.emplace_back("CSV File is empty", false);
      71           0 :         success = false;
      72           0 :         return nullptr;
      73             :     }
      74             : 
      75          10 :     success = true;
      76          10 :     cur_line_num = 1;
      77          10 :     index_into_cur_line = 0;
      78          10 :     beginning_of_line_index = 0;
      79          10 :     delimiter = t_delimiter;
      80          10 :     rows_to_skip = t_rows_to_skip;
      81          10 :     csv_size = csv.size();
      82             : 
      83          10 :     size_t index = 0;
      84          10 :     return parse_csv(csv, index);
      85             : }
      86             : 
      87           1 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
      88             : {
      89             :     Token token;
      90           1 :     int rows_skipped = 0;
      91             :     while (true) {
      92         273 :         token = next_token(csv, index);
      93         273 :         if (token == Token::FILE_END) {
      94           0 :             break;
      95         273 :         } else if (token == Token::LINE_END) {
      96           4 :             ++rows_skipped;
      97           4 :             if (rows_skipped == rows_to_skip) {
      98           1 :                 break;
      99             :             }
     100             :         }
     101             :     }
     102           1 : }
     103             : 
     104          10 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
     105             : {
     106             :     Token token;
     107             :     Token prev_token;
     108          10 :     int num_columns = 0;
     109             : 
     110          10 :     size_t save_index = index;
     111          10 :     size_t save_line_num = cur_line_num;
     112          10 :     size_t save_line_index = index_into_cur_line;
     113          10 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     114             : 
     115             :     while (true) {
     116        3375 :         token = next_token(csv, save_index);
     117        3375 :         if (token == Token::FILE_END) {
     118           0 :             break;
     119        3375 :         } else if (token == Token::DELIMITER) {
     120         303 :             ++num_columns;
     121        3072 :         } else if (token == Token::LINE_END) {
     122             :             // Catch a trailing comma, such as Shading files from E+ 22.2.0 and below
     123          10 :             if (prev_token != Token::DELIMITER) {
     124           8 :                 ++num_columns;
     125             :             }
     126          10 :             break;
     127             :         }
     128        3365 :         prev_token = token;
     129             :     }
     130             : 
     131          10 :     cur_line_num = save_line_num;
     132          10 :     index_into_cur_line = save_line_index;
     133          10 :     beginning_of_line_index = save_beginning_of_line_index;
     134             : 
     135          10 :     return num_columns;
     136             : }
     137             : 
     138          10 : json CsvParser::parse_csv(std::string_view csv, size_t &index)
     139             : {
     140         100 :     json root = {{"header", json::array()}, {"values", json::array()}};
     141          10 :     bool check_first_row = true;
     142          10 :     bool has_header = (rows_to_skip == 1);
     143             : 
     144          10 :     constexpr size_t reservedSize = 8764 * 4;
     145             : 
     146          10 :     if (csv_size > 3) {
     147             :         // UTF-8 Byte Order Mark
     148          10 :         if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
     149           0 :             index += 3;
     150           0 :             index_into_cur_line += 3;
     151             :         }
     152             :     }
     153             : 
     154          10 :     if (rows_to_skip > 1) {
     155           1 :         skip_rows(csv, index);
     156             :     }
     157             : 
     158          10 :     json &header = root["header"];
     159          10 :     json &columns = root["values"];
     160             :     while (true) {
     161      271580 :         if (index == csv_size) {
     162          10 :             break;
     163             :         } else {
     164      271570 :             if (check_first_row) {
     165             :                 // Parse the header first, it could have an extra '()' for shading in 22.2.0 and below
     166          10 :                 if (has_header) {
     167           9 :                     parse_header(csv, index, header);
     168             :                 }
     169          10 :                 int num_columns = find_number_columns(csv, index);
     170          10 :                 check_first_row = false;
     171             : 
     172         321 :                 for (int i = 0; i < num_columns; ++i) {
     173         311 :                     auto arr = std::vector<json>(); // (THIS_AUTO_OK)
     174         311 :                     arr.reserve(reservedSize);
     175         311 :                     columns.push_back(std::move(arr));
     176         311 :                 }
     177             : 
     178          10 :                 continue;
     179          10 :             }
     180             : 
     181      271560 :             parse_line(csv, index, columns);
     182      271560 :             if (!success) {
     183           0 :                 break; // Bail early
     184             :             }
     185             :         }
     186      271570 :     }
     187             : 
     188          10 :     return root;
     189           0 : }
     190             : 
     191         612 : void CsvParser::parse_header(std::string_view csv, size_t &index, json &header)
     192             : {
     193             :     Token token;
     194             : 
     195             :     while (true) {
     196         612 :         token = look_ahead(csv, index);
     197         612 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     198           9 :             next_token(csv, index);
     199           9 :             return;
     200         603 :         } else if (token == Token::DELIMITER) {
     201         298 :             next_token(csv, index);
     202             :         } else {
     203         305 :             header.push_back(parse_value(csv, index));
     204             :         }
     205             :     }
     206             : }
     207             : 
     208      271560 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
     209             : {
     210             :     Token token;
     211      271560 :     size_t column_num = 0;
     212      271560 :     size_t parsed_values = 0;
     213      271560 :     const size_t num_columns = columns.size(); // Csv isn't empty, so we know it's at least 1
     214             : 
     215      271560 :     size_t this_cur_line_num = cur_line_num;
     216      271560 :     size_t this_beginning_of_line_index = beginning_of_line_index;
     217             : 
     218             :     while (true) {
     219    22320480 :         token = look_ahead(csv, index);
     220    22320480 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     221      271560 :             if (parsed_values != num_columns) {
     222           0 :                 success = false;
     223             : 
     224           0 :                 size_t found_index = csv.find_first_of("\r\n", this_beginning_of_line_index);
     225           0 :                 std::string line;
     226           0 :                 if (found_index != std::string::npos) {
     227           0 :                     line = csv.substr(this_beginning_of_line_index, found_index - this_beginning_of_line_index);
     228             :                 }
     229           0 :                 errors_.emplace_back(
     230           0 :                     fmt::format(
     231             :                         "CsvParser - Line {} - Expected {} columns, got {}. Error in following line.", this_cur_line_num, num_columns, parsed_values),
     232           0 :                     false);
     233           0 :                 errors_.emplace_back(line, true);
     234           0 :             }
     235      271560 :             next_token(csv, index);
     236      271560 :             return;
     237    22048920 :         } else if (token == Token::DELIMITER) {
     238    10923720 :             next_token(csv, index);
     239    10923720 :             ++column_num;
     240             :         } else {
     241    11125200 :             columns.at(column_num).push_back(parse_value(csv, index));
     242    11125200 :             ++parsed_values;
     243             :         }
     244    22048920 :     }
     245             : }
     246             : 
     247    11125505 : json CsvParser::parse_value(std::string_view csv, size_t &index)
     248             : {
     249    11125505 :     eat_whitespace(csv, index);
     250             : 
     251    11125505 :     size_t save_i = index;
     252             : 
     253             :     while (true) {
     254   121194113 :         if (save_i == csv_size) {
     255           0 :             break;
     256             :         }
     257             : 
     258   121194113 :         char const c = csv[save_i];
     259   121194113 :         if (c == delimiter || c == '\n' || c == '\r') {
     260             :             break;
     261             :         }
     262   110068608 :         ++save_i;
     263   110068608 :     }
     264             : 
     265    11125505 :     size_t diff = save_i - index;
     266    11125505 :     std::string_view value = csv.substr(index, diff);
     267    11125505 :     index_into_cur_line += diff;
     268    11125505 :     index = save_i;
     269             : 
     270    11125505 :     size_t plus_sign = 0;
     271    11125505 :     if (value.front() == '+') {
     272           0 :         plus_sign = 1;
     273             :     }
     274             : 
     275    11125505 :     auto const value_end = value.data() + value.size(); // have to do this for MSVC // (AUTO_OK_ITER)
     276             : 
     277             :     double val;
     278    11125505 :     auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val); // (AUTO_OK_OBJ)
     279    11125505 :     if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
     280         610 :         return rtrim(value);
     281    11125200 :     } else if (result.ptr != value_end) {
     282      297840 :         auto const initial_ptr = result.ptr; // (THIS_AUTO_OK)
     283      332880 :         while (delimiter != ' ' && result.ptr != value_end) {
     284      297840 :             if (*result.ptr != ' ') {
     285      262800 :                 break;
     286             :             }
     287       35040 :             ++result.ptr;
     288             :         }
     289      297840 :         if (result.ptr == value_end) {
     290       35040 :             index -= (value_end - initial_ptr);
     291       35040 :             index_into_cur_line -= (value_end - initial_ptr);
     292       35040 :             return val;
     293             :         }
     294      525600 :         return rtrim(value);
     295             :     }
     296             : 
     297    10827360 :     return val;
     298             : }
     299             : 
     300    22321092 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
     301             : {
     302    22321092 :     size_t save_index = index;
     303    22321092 :     size_t save_line_num = cur_line_num;
     304    22321092 :     size_t save_line_index = index_into_cur_line;
     305    22321092 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     306    22321092 :     Token token = next_token(csv, save_index);
     307    22321092 :     cur_line_num = save_line_num;
     308    22321092 :     index_into_cur_line = save_line_index;
     309    22321092 :     beginning_of_line_index = save_beginning_of_line_index;
     310    22321092 :     return token;
     311             : }
     312             : 
     313    33520327 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
     314             : {
     315    33520327 :     eat_whitespace(csv, index);
     316             : 
     317    33520327 :     if (index == csv_size) {
     318           0 :         return Token::FILE_END;
     319             :     }
     320             : 
     321    33520327 :     char const c = csv[index];
     322    33520327 :     if (c == delimiter) {
     323    21848360 :         increment_both_index(index, index_into_cur_line);
     324    21848360 :         return Token::DELIMITER;
     325    11671967 :     } else if (c == '\n') {
     326      543152 :         increment_both_index(index, cur_line_num);
     327      543152 :         beginning_of_line_index = index;
     328      543152 :         index_into_cur_line = 0;
     329      543152 :         return Token::LINE_END;
     330             :     }
     331    11128815 :     increment_both_index(index, index_into_cur_line);
     332    11128815 :     return Token::VALUE;
     333             : }
     334             : 
     335      263105 : std::string_view CsvParser::rtrim(std::string_view str)
     336             : {
     337             :     static constexpr std::string_view whitespace(" \t", 2);
     338      263105 :     if (str.empty()) {
     339           0 :         return str;
     340             :     }
     341      263105 :     size_t const index = str.find_last_not_of(whitespace);
     342      263105 :     if (index == std::string::npos) {
     343           0 :         str.remove_suffix(str.size());
     344           0 :         return str;
     345      263105 :     } else if (index + 1 < str.length()) {
     346           2 :         return str.substr(0, index + 1);
     347             :     }
     348      263103 :     return str;
     349             : }
     350             : 
     351    34116048 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
     352             : {
     353    34116048 :     index++;
     354    34116048 :     line_index++;
     355    34116048 : }
     356             : 
     357           0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
     358             : {
     359           0 :     index--;
     360           0 :     line_index--;
     361           0 : }
     362             : 
     363    44645832 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
     364             : {
     365    45241553 :     while (index < csv_size) {
     366    45241553 :         if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
     367      595721 :             increment_both_index(index, index_into_cur_line);
     368      595721 :             continue;
     369             :         } else {
     370    44645832 :             return;
     371             :         }
     372             :     }
     373             : }

Generated by: LCOV version 1.14