LCOV - code coverage report
Current view: top level - EnergyPlus/InputProcessing - CsvParser.cc (source / functions) Hit Total Coverage
Test: lcov.output.filtered Lines: 156 197 79.2 %
Date: 2023-01-17 19:17:23 Functions: 13 20 65.0 %

          Line data    Source code
       1             : // EnergyPlus, Copyright (c) 1996-2023, The Board of Trustees of the University of Illinois,
       2             : // The Regents of the University of California, through Lawrence Berkeley National Laboratory
       3             : // (subject to receipt of any required approvals from the U.S. Dept. of Energy), Oak Ridge
       4             : // National Laboratory, managed by UT-Battelle, Alliance for Sustainable Energy, LLC, and other
       5             : // contributors. All rights reserved.
       6             : //
       7             : // NOTICE: This Software was developed under funding from the U.S. Department of Energy and the
       8             : // U.S. Government consequently retains certain rights. As such, the U.S. Government has been
       9             : // granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable,
      10             : // worldwide license in the Software to reproduce, distribute copies to the public, prepare
      11             : // derivative works, and perform publicly and display publicly, and to permit others to do so.
      12             : //
      13             : // Redistribution and use in source and binary forms, with or without modification, are permitted
      14             : // provided that the following conditions are met:
      15             : //
      16             : // (1) Redistributions of source code must retain the above copyright notice, this list of
      17             : //     conditions and the following disclaimer.
      18             : //
      19             : // (2) Redistributions in binary form must reproduce the above copyright notice, this list of
      20             : //     conditions and the following disclaimer in the documentation and/or other materials
      21             : //     provided with the distribution.
      22             : //
      23             : // (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory,
      24             : //     the University of Illinois, U.S. Dept. of Energy nor the names of its contributors may be
      25             : //     used to endorse or promote products derived from this software without specific prior
      26             : //     written permission.
      27             : //
      28             : // (4) Use of EnergyPlus(TM) Name. If Licensee (i) distributes the software in stand-alone form
      29             : //     without changes from the version obtained under this License, or (ii) Licensee makes a
      30             : //     reference solely to the software portion of its product, Licensee must refer to the
      31             : //     software as "EnergyPlus version X" software, where "X" is the version number Licensee
      32             : //     obtained under this License and may not use a different name for the software. Except as
      33             : //     specifically required in this Section (4), Licensee shall not use in a company name, a
      34             : //     product name, in advertising, publicity, or other promotional activities any name, trade
      35             : //     name, trademark, logo, or other designation of "EnergyPlus", "E+", "e+" or confusingly
      36             : //     similar designation, without the U.S. Department of Energy's prior written consent.
      37             : //
      38             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
      39             : // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      40             : // AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
      41             : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      42             : // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      43             : // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      44             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      45             : // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      46             : // POSSIBILITY OF SUCH DAMAGE.
      47             : 
      48             : #include <EnergyPlus/InputProcessing/CsvParser.hh>
      49             : #include <fast_float/fast_float.h>
      50             : #include <fmt/format.h>
      51             : #include <milo/dtoa.h>
      52             : #include <milo/itoa.h>
      53             : 
      54             : using json = nlohmann::json;
      55             : 
      56           0 : std::vector<std::string> const &CsvParser::errors()
      57             : {
      58           0 :     return errors_;
      59             : }
      60             : 
      61           0 : std::vector<std::string> const &CsvParser::warnings()
      62             : {
      63           0 :     return warnings_;
      64             : }
      65             : 
      66           0 : bool CsvParser::hasErrors()
      67             : {
      68           0 :     return !errors_.empty();
      69             : }
      70             : 
      71          10 : json CsvParser::decode(std::string_view csv, char user_delimiter, int user_rows_to_skip)
      72             : {
      73          10 :     bool success = true;
      74          10 :     return decode(csv, csv.size(), success, user_delimiter, user_rows_to_skip);
      75             : }
      76             : 
      77           0 : json CsvParser::decode(std::string_view csv, bool &success, char user_delimiter, int user_rows_to_skip)
      78             : {
      79           0 :     return decode(csv, csv.size(), success, user_delimiter, user_rows_to_skip);
      80             : }
      81             : 
      82           0 : json CsvParser::decode(std::string_view csv, size_t _csv_size, char user_delimiter, int user_rows_to_skip)
      83             : {
      84           0 :     bool success = true;
      85           0 :     return decode(csv, _csv_size, success, user_delimiter, user_rows_to_skip);
      86             : }
      87             : 
      88          10 : json CsvParser::decode(std::string_view csv, size_t _csv_size, bool &success, char _delimiter, int _rows_to_skip)
      89             : {
      90          10 :     if (csv.empty()) {
      91           0 :         success = false;
      92           0 :         return nullptr;
      93             :     }
      94             : 
      95          10 :     success = true;
      96          10 :     cur_line_num = 1;
      97          10 :     index_into_cur_line = 0;
      98          10 :     beginning_of_line_index = 0;
      99          10 :     delimiter = _delimiter;
     100          10 :     rows_to_skip = _rows_to_skip;
     101          10 :     csv_size = _csv_size;
     102             : 
     103          10 :     size_t index = 0;
     104          10 :     return parse_csv(csv, index, success);
     105             : }
     106             : 
     107           0 : std::string CsvParser::encode(json const &root)
     108             : {
     109           0 :     std::string encoded;
     110           0 :     if (csv_size > 0) {
     111           0 :         encoded.reserve(csv_size);
     112             :     } else {
     113           0 :         encoded.reserve(root["header"].size() * 8760 * 2 * 3);
     114             :     }
     115             : 
     116           0 :     return encoded;
     117             : }
     118             : 
     119           1 : void CsvParser::skip_rows(std::string_view csv, size_t &index)
     120             : {
     121             :     Token token;
     122           1 :     int rows_skipped = 0;
     123             :     while (true) {
     124         545 :         token = next_token(csv, index);
     125         273 :         if (token == Token::FILE_END) {
     126           0 :             break;
     127         273 :         } else if (token == Token::LINE_END) {
     128           4 :             ++rows_skipped;
     129           4 :             if (rows_skipped == rows_to_skip) {
     130           1 :                 break;
     131             :             }
     132             :         }
     133             :     }
     134           1 : }
     135             : 
     136          10 : int CsvParser::find_number_columns(std::string_view csv, size_t &index)
     137             : {
     138             :     Token token;
     139          10 :     int num_columns = 0;
     140             : 
     141          10 :     size_t save_index = index;
     142          10 :     size_t save_line_num = cur_line_num;
     143          10 :     size_t save_line_index = index_into_cur_line;
     144          10 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     145             : 
     146             :     while (true) {
     147       14130 :         token = next_token(csv, save_index);
     148        7070 :         if (token == Token::FILE_END) {
     149           0 :             break;
     150        7070 :         } else if (token == Token::DELIMITER) {
     151         303 :             ++num_columns;
     152        6767 :         } else if (token == Token::LINE_END) {
     153          10 :             ++num_columns;
     154          10 :             break;
     155             :         }
     156             :     }
     157             : 
     158          10 :     cur_line_num = save_line_num;
     159          10 :     index_into_cur_line = save_line_index;
     160          10 :     beginning_of_line_index = save_beginning_of_line_index;
     161             : 
     162          10 :     return num_columns;
     163             : }
     164             : 
     165          10 : json CsvParser::parse_csv(std::string_view csv, size_t &index, bool &success)
     166             : {
     167          10 :     json root = {{"header", json::array()}, {"values", json::array()}};
     168          10 :     bool check_first_row = true;
     169          10 :     bool has_header = (rows_to_skip == 1);
     170             : 
     171          10 :     if (csv_size > 3) {
     172             :         // UTF-8 Byte Order Mark
     173          10 :         if (csv[0] == '\xEF' && csv[1] == '\xBB' && csv[2] == '\xBF') {
     174           0 :             index += 3;
     175           0 :             index_into_cur_line += 3;
     176             :         }
     177             :     }
     178             : 
     179          10 :     if (rows_to_skip > 1) {
     180           1 :         skip_rows(csv, index);
     181             :     }
     182             : 
     183          10 :     json &header = root["header"];
     184          10 :     json &columns = root["values"];
     185             :     while (true) {
     186      271580 :         if (index == csv_size) {
     187          10 :             break;
     188             :         } else {
     189      271570 :             if (check_first_row) {
     190          10 :                 int num_columns = find_number_columns(csv, index);
     191          10 :                 check_first_row = !check_first_row;
     192             : 
     193         323 :                 for (int i = 0; i < num_columns; ++i) {
     194         626 :                     auto arr = std::vector<json>();
     195         313 :                     arr.reserve(8764 * 4);
     196         313 :                     columns.push_back(std::move(arr));
     197             :                 }
     198             : 
     199          10 :                 if (has_header) {
     200           9 :                     parse_header(csv, index, success, header);
     201             :                 }
     202          10 :                 continue;
     203             :             }
     204             : 
     205      271560 :             parse_line(csv, index, columns);
     206      271560 :             if (!success) {
     207           0 :                 auto found_index = csv.find_first_of('\n', beginning_of_line_index);
     208           0 :                 std::string line;
     209           0 :                 if (found_index != std::string::npos) {
     210           0 :                     line = csv.substr(beginning_of_line_index, found_index - beginning_of_line_index);
     211             :                 }
     212           0 :                 errors_.emplace_back(fmt::format("Line: {} Index: {} - Parsing Error. Error in following line.", cur_line_num, index_into_cur_line));
     213           0 :                 errors_.emplace_back(fmt::format("~~~ {}", line));
     214           0 :                 success = false;
     215           0 :                 continue;
     216             :             }
     217             :         }
     218      271570 :     }
     219             : 
     220          10 :     return root;
     221             : }
     222             : 
     223         612 : void CsvParser::parse_header(std::string_view csv, size_t &index, bool &success, json &header)
     224             : {
     225             :     Token token;
     226             : 
     227             :     while (true) {
     228        1215 :         token = look_ahead(csv, index);
     229         612 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     230           9 :             next_token(csv, index);
     231           9 :             return;
     232         603 :         } else if (token == Token::DELIMITER) {
     233         298 :             next_token(csv, index);
     234             :         } else {
     235         305 :             header.push_back(parse_value(csv, index));
     236         305 :             if (!success) return;
     237             :         }
     238             :     }
     239             : }
     240             : 
     241      271560 : void CsvParser::parse_line(std::string_view csv, size_t &index, json &columns)
     242             : {
     243             :     Token token;
     244      271560 :     int column_num = 0;
     245             : 
     246             :     while (true) {
     247    44369400 :         token = look_ahead(csv, index);
     248    22320480 :         if (token == Token::LINE_END || token == Token::FILE_END) {
     249      271560 :             next_token(csv, index);
     250      271560 :             return;
     251    22048920 :         } else if (token == Token::DELIMITER) {
     252    10923720 :             next_token(csv, index);
     253    10923720 :             ++column_num;
     254             :         } else {
     255    11125200 :             columns.at(column_num).push_back(parse_value(csv, index));
     256             :             //            if (!success) return;
     257             :         }
     258             :     }
     259             : }
     260             : 
     261             : // json CsvParser::parse_value(std::string_view csv, size_t &index, bool &success)
     262             : //{
     263             : //    Token token;
     264             : //    token = look_ahead(csv, index);
     265             : //
     266             : //    switch (token) {
     267             : //        case Token::STRING: {
     268             : //            return parse_string(csv, index);
     269             : //        }
     270             : //        case Token::Num: {
     271             : //            return parse_number(csv, index);
     272             : //        }
     273             : //        case Token::FILE_END:
     274             : //        case Token::LINE_END:
     275             : //        case Token::DELIMITER:
     276             : //        default:
     277             : //            break;
     278             : //    }
     279             : //    success = false;
     280             : //    return nullptr;
     281             : //}
     282             : 
     283    11125505 : json CsvParser::parse_value(std::string_view csv, size_t &index)
     284             : {
     285    11125505 :     eat_whitespace(csv, index);
     286             : 
     287    11125505 :     size_t save_i = index;
     288             : 
     289             :     while (true) {
     290   121194113 :         if (save_i == csv_size) {
     291           0 :             break;
     292             :         }
     293             : 
     294   121194113 :         char const c = csv[save_i];
     295   121194113 :         if (c == delimiter || c == '\n' || c == '\r') {
     296             :             break;
     297             :         }
     298   110068608 :         ++save_i;
     299   110068608 :     }
     300             : 
     301    11125505 :     auto diff = save_i - index;
     302    11125505 :     auto value = csv.substr(index, diff);
     303    11125505 :     index_into_cur_line += diff;
     304    11125505 :     index = save_i;
     305             : 
     306    11125505 :     size_t plus_sign = 0;
     307    11125505 :     if (value.front() == '+') {
     308           0 :         plus_sign = 1;
     309             :     }
     310             : 
     311    11125505 :     auto const value_end = value.data() + value.size(); // have to do this for MSVC
     312             : 
     313             :     double val;
     314    11125505 :     auto result = fast_float::from_chars(value.data() + plus_sign, value.data() + value.size(), val);
     315    11125505 :     if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) {
     316         305 :         return rtrim(value);
     317    11125200 :     } else if (result.ptr != value_end) {
     318      297840 :         auto const initial_ptr = result.ptr;
     319      367920 :         while (delimiter != ' ' && result.ptr != value_end) {
     320      297840 :             if (*result.ptr != ' ') {
     321      262800 :                 break;
     322             :             }
     323       35040 :             ++result.ptr;
     324             :         }
     325      297840 :         if (result.ptr == value_end) {
     326       35040 :             index -= (value_end - initial_ptr);
     327       35040 :             index_into_cur_line -= (value_end - initial_ptr);
     328       35040 :             return val;
     329             :         }
     330      262800 :         return rtrim(value);
     331             :     }
     332             :     //    double integral;
     333             :     //    double fractional = std::modf(val, &integral);
     334             :     //    if (fractional == 0) {
     335             :     //        return static_cast<int>(fractional);
     336             :     //    }
     337    10827360 :     return val;
     338             : 
     339             :     //    auto const convert_double = [](std::string_view str) -> json {
     340             :     //        double val;
     341             :     //        auto result = fast_float::from_chars(str.data(), str.data() + str.size(), val);
     342             :     //        if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range || result.ptr != str.end()) {
     343             :     //            return rtrim(str);
     344             :     //        }
     345             :     //        return val;
     346             :     //    };
     347             :     //
     348             :     //    auto const convert_int = [&convert_double](std::string_view str) -> json {
     349             :     //        int val;
     350             :     //        auto result = std::from_chars(str.data(), str.data() + str.size(), val);
     351             :     //        if (result.ec == std::errc::result_out_of_range) {
     352             :     //            return convert_double(str);
     353             :     //        } else if (result.ec == std::errc::invalid_argument) {
     354             :     //            if (*result.ptr == '.') {
     355             :     //                return convert_double(str);
     356             :     //            } else {
     357             :     //                return rtrim(str);
     358             :     //            }
     359             :     //        } else if (result.ptr != str.end()) {
     360             :     //            if (*result.ptr == '.' || *result.ptr == 'e' || *result.ptr == 'E') {
     361             :     //                return convert_double(str);
     362             :     //            } else {
     363             :     //                return rtrim(str);
     364             :     //            }
     365             :     //        }
     366             :     //        return val;
     367             :     //    };
     368             :     //
     369             :     //    return convert_int(value);
     370             : }
     371             : 
     372             : // std::string CsvParser::parse_string(std::string_view csv, size_t &index)
     373             : //{
     374             : //    eat_whitespace(csv, index);
     375             : //
     376             : //    std::string str;
     377             : //    char c;
     378             : //
     379             : //    while (true) {
     380             : //        if (index == csv_size) {
     381             : //            break;
     382             : //        }
     383             : //
     384             : //        c = csv[index];
     385             : //        increment_both_index(index, index_into_cur_line);
     386             : //        if (c == delimiter || c == '\n') {
     387             : //            decrement_both_index(index, index_into_cur_line);
     388             : //            break;
     389             : //        } else if (c == '\r') {
     390             : //            continue;
     391             : //        } else {
     392             : //            str += c;
     393             : //        }
     394             : //    }
     395             : //
     396             : //    return rtrim(str);
     397             : //}
     398             : 
     399    22321092 : CsvParser::Token CsvParser::look_ahead(std::string_view csv, size_t index)
     400             : {
     401    22321092 :     size_t save_index = index;
     402    22321092 :     size_t save_line_num = cur_line_num;
     403    22321092 :     size_t save_line_index = index_into_cur_line;
     404    22321092 :     size_t save_beginning_of_line_index = beginning_of_line_index;
     405    22321092 :     Token token = next_token(csv, save_index);
     406    22321092 :     cur_line_num = save_line_num;
     407    22321092 :     index_into_cur_line = save_line_index;
     408    22321092 :     beginning_of_line_index = save_beginning_of_line_index;
     409    22321092 :     return token;
     410             : }
     411             : 
     412    33524022 : CsvParser::Token CsvParser::next_token(std::string_view csv, size_t &index)
     413             : {
     414    33524022 :     eat_whitespace(csv, index);
     415             : 
     416    33524022 :     if (index == csv_size) {
     417           0 :         return Token::FILE_END;
     418             :     }
     419             : 
     420    33524022 :     char const c = csv[index];
     421    33524022 :     if (c == delimiter) {
     422    21848360 :         increment_both_index(index, index_into_cur_line);
     423    21848360 :         return Token::DELIMITER;
     424    11675662 :     } else if (c == '\n') {
     425      543152 :         increment_both_index(index, cur_line_num);
     426      543152 :         beginning_of_line_index = index;
     427      543152 :         index_into_cur_line = 0;
     428      543152 :         return Token::LINE_END;
     429             :     }
     430    11132510 :     increment_both_index(index, index_into_cur_line);
     431    11132510 :     return Token::VALUE;
     432             : }
     433             : 
     434      263105 : std::string_view CsvParser::rtrim(std::string_view str)
     435             : {
     436             :     static constexpr std::string_view whitespace(" \t", 2);
     437      263105 :     if (str.empty()) {
     438           0 :         return str;
     439             :     }
     440      263105 :     auto const index = str.find_last_not_of(whitespace);
     441      263105 :     if (index == std::string::npos) {
     442           0 :         str.remove_suffix(str.size());
     443           0 :         return str;
     444      263105 :     } else if (index + 1 < str.length()) {
     445           2 :         return str.substr(0, index + 1);
     446             :     }
     447      263103 :     return str;
     448             : }
     449             : 
     450    34120066 : void CsvParser::increment_both_index(size_t &index, size_t &line_index)
     451             : {
     452    34120066 :     index++;
     453    34120066 :     line_index++;
     454    34120066 : }
     455             : 
     456           0 : void CsvParser::decrement_both_index(size_t &index, size_t &line_index)
     457             : {
     458           0 :     index--;
     459           0 :     line_index--;
     460           0 : }
     461             : 
     462    45245571 : void CsvParser::eat_whitespace(std::string_view csv, size_t &index)
     463             : {
     464    45841615 :     while (index < csv_size) {
     465    45841615 :         if ((delimiter != ' ' && csv[index] == ' ') || (delimiter != '\t' && csv[index] == '\t') || csv[index] == '\r') {
     466      596044 :             increment_both_index(index, index_into_cur_line);
     467      596044 :             continue;
     468             :         } else {
     469    44649527 :             return;
     470             :         }
     471             :     }
     472             : }

Generated by: LCOV version 1.13