Kofi Kofi - 27 days ago 9
C++ Question

Parsing a structured text using boost spirit

Once more, I would like to ask for your help. Using the boost spirit library, I would like to parse the syntax below into the struct "unitConstruct". So far my parser fails and is not able to parse correctly this syntax. Your help will be highly appreciated. Please, find a snapshot of my code below.

This syntax will follow this format as is based on a standard. I would like to store UNIT in key(as shown in the struct unitConstruct), and store TB_SENSOR_PRIMARY_VALUE_UNIT in identifier, and finally I would like to store the tokens in the vector (variablereferences). This pattern is required to uniquely retrieve this information later in my code.

// Syntax to be parsed
UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
trans1_primary_value,
trans1_scale_out_lower_value,
trans1_scale_out_upper_value,
func1_AI_pv_upper_range_value,
func1_AI_pv_lower_range_value,
func1_AI_simulate_value
}

//structure to store the syntax above
struct unitConstruct
{
std::string key;
std::string identifier;
std::vector<std::string> variablereferences;
};


// code snapshot
typedef std::vector<unitConstruct> eddlParsedData
template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
skipper() : skipper::base_type(start)
{
start = ascii::blank;
}
private:
qi::rule<Iterator> start, comment;
};

template <typename Iterator>
struct eddlparser : qi::grammar<Iterator, eddlParsedData(), skipper<Iterator> >
{
eddlparser() : eddlparser::base_type(start)
{
unitkey = qi::string("UNIT");
text = +qi::graph;
unit = unitkey >> text >> qi::eol
>> '{' >> qi::eol
>> +text >> qi::eol
>> '}' ;

BOOST_SPIRIT_DEBUG_NODE(unit);
start = (unit) % *qi::eol;
}

private:
qi::rule<Iterator, std::string(), skipper<Iterator> > uni, unitkey, text;
qi::rule<Iterator, unitConstruct(), skipper<Iterator> > unit;


// adapt unitConstruct as a Fusion sequence
BOOST_FUSION_ADAPT_STRUCT(
unitConstruct,
(std::string, key)
(std::string, identifier)
(std::vector<std::string>, variablereferences)
)

Answer

Let's start with your example and the details we discussed in comments. You want to parse a definition that looks as follows.

UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
  • UNIT specifies definition type. Only upper case is allowed, and we desire to only use predefined keywords (just "UNIT" at this point).
  • TB_SENSOR_PRIMARY_VALUE_UNIT specifies definition name. It may contain upper case letters, digits or an underscore. The first symbol may not be a digit.
  • Identifiers may consist of lower and upper case letters, digits or underscores. The first symbol may not be a digit.
  • trans1_primary_value_unit specifies a single dependent. It is an identifier.
  • trans1_primary_value, ... func1_AI_simulate_value specify dependencies. Each dependency is an identifier. Multiple dependencies are separated by commas.

Based on this information, we need a grammar that looks as follows (in EBNF).

type = 'UNIT'
name = ( upper | "_" ), { upper | digit | "_" }
identifier = ( upper | lower | "_" ), { upper | lower | digit | "_" }
dependent = identifier
dependency = identifier
dependencies = dependency, { ",", dependency }
definition = type, name, "{", dependent, ":", dependencies, "}"

We can use phrase parsing ignoring spaces, tabs, newlines and returns, since we don't require them to parse this correctly.

Source Code

Live on Coliru

#include <string>
#include <iostream>

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
// ======================================================================
std::string TEST_INPUT = R"(UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
)";
// ======================================================================
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
namespace ascii = boost::spirit::ascii;
// ======================================================================
struct definition
{
    std::string type;
    std::string name;
    std::string dependent;
    std::vector<std::string> dependencies;
};
// ======================================================================
BOOST_FUSION_ADAPT_STRUCT(
    definition,
    (std::string, type)
    (std::string, name)
    (std::string, dependent)
    (std::vector<std::string>, dependencies)
)
// ======================================================================
template <typename Iterator>
struct skipper
    : qi::grammar<Iterator>
{
    skipper()
        : skipper::base_type(start)
    {
        start %= ascii::space;
    }

private:
    qi::rule<Iterator> start;
};
// ======================================================================
template <typename Iterator>
struct def_parser
    : qi::grammar<Iterator, definition(), skipper<Iterator> >
{
    def_parser()
        : def_parser::base_type(start)
    {
        def_type %= qi::string("UNIT");

        def_name %= (ascii::upper | '_')
            >> *(ascii::upper | ascii::digit | '_');

        identifier %= (ascii::upper | ascii::lower | '_')
            >> *(ascii::upper | ascii::lower | ascii::digit | '_');
        def_dependent %= identifier;
        def_dependency %= identifier;
        def_dependencies %= def_dependency % qi::lit(",");

        start %= def_type
            >> def_name
            >> qi::lit("{")
            >> def_dependent
            >> qi::lit(":")
            >> def_dependencies
            >> qi::lit("}")
            ;

        init_debug();
    }

    void init_debug()
    {
        def_type.name("def_type");
        def_name.name("def_name");
        identifier.name("identifier");
        def_dependent.name("def_dependent");
        def_dependency.name("def_dependency");
        def_dependencies.name("def_dependencies");
        start.name("start");

        qi::debug(def_type);
        qi::debug(def_name);
        qi::debug(identifier);
        qi::debug(def_dependent);
        qi::debug(def_dependency);
        qi::debug(def_dependencies);
        qi::debug(start);
    }

private:
    qi::rule<Iterator, std::string(), skipper<Iterator>> def_type;
    qi::rule<Iterator, std::string(), skipper<Iterator>> def_name;    
    qi::rule<Iterator, std::string(), skipper<Iterator>> identifier;
    qi::rule<Iterator, std::string(), skipper<Iterator>> def_dependent;
    qi::rule<Iterator, std::string(), skipper<Iterator>> def_dependency;
    qi::rule<Iterator, std::vector<std::string>(), skipper<Iterator>> def_dependencies;

    qi::rule<Iterator, definition(), skipper<Iterator>> start;
};
// ======================================================================
int main()
{
    typedef std::string::const_iterator iterator_type;

    def_parser<iterator_type> g;
    skipper<iterator_type> s;

    definition d;

    iterator_type iter = TEST_INPUT.begin();
    iterator_type end = TEST_INPUT.end();

    bool r = qi::phrase_parse(iter, end, g, s, d);

    if (r) {
        std::cout << "Bytes left = " << std::distance(iter, end) << " -> "
            << ((iter == end) ? "SUCCEEDED" : "FAILED") << "\n";
        std::cout << "Type = " << d.type << "\n";
        std::cout << "Name = " << d.name << "\n";
        std::cout << "Dependent = " << d.dependent << "\n";        
        for (auto const& ref : d.dependencies) {
            std::cout << "Dependency = " << ref << "\n";
        }
    } else {
        std::cout << "FAILED COMPLETELY\n";
    }

    return 0;
}
// ======================================================================

Debug Output

<start>
  <try>UNIT TB_SENSOR_PRIMA</try>
  <def_type>
    <try>UNIT TB_SENSOR_PRIMA</try>
    <success> TB_SENSOR_PRIMARY_V</success>
    <attributes>[[U, N, I, T]]</attributes>
  </def_type>
  <def_name>
    <try> TB_SENSOR_PRIMARY_V</try>
    <success>{\ntrans1_primary_val</success>
    <attributes>[[T, B, , S, E, N, S, O, R, , P, R, I, M, A, R, Y, , V, A, L, U, E, , U, N, I, T]]</attributes>
  </def_name>
  <def_dependent>
    <try>\ntrans1_primary_valu</try>
    <identifier>
      <try>\ntrans1_primary_valu</try>
      <success>:\n    trans1_primary</success>
      <attributes>[[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e, , u, n, i, t]]</attributes>
    </identifier>
    <success>:\n    trans1_primary</success>
    <attributes>[[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e, , u, n, i, t]]</attributes>
  </def_dependent>
  <def_dependencies>
    <try>\n    trans1_primary_</try>
    <def_dependency>
      <try>\n    trans1_primary_</try>
      <identifier>
        <try>\n    trans1_primary_</try>
        <success>,\n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    trans1_scale_ou</try>
      <identifier>
        <try>\n    trans1_scale_ou</try>
        <success>,\n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , l, o, w, e, r, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , l, o, w, e, r, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    trans1_scale_ou</try>
      <identifier>
        <try>\n    trans1_scale_ou</try>
        <success>,\n    func1_AI_pv_up</success>
        <attributes>[[t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , u, p, p, e, r, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_pv_up</success>
      <attributes>[[t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , u, p, p, e, r, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_pv_upp</try>
      <identifier>
        <try>\n    func1_AI_pv_upp</try>
        <success>,\n    func1_AI_pv_lo</success>
        <attributes>[[f, u, n, c, 1, , A, I, , p, v, , u, p, p, e, r, , r, a, n, g, e, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_pv_lo</success>
      <attributes>[[f, u, n, c, 1, , A, I, , p, v, , u, p, p, e, r, , r, a, n, g, e, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_pv_low</try>
      <identifier>
        <try>\n    func1_AI_pv_low</try>
        <success>,\n    func1_AI_simul</success>
        <attributes>[[f, u, n, c, 1, , A, I, , p, v, , l, o, w, e, r, , r, a, n, g, e, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>,\n    func1_AI_simul</success>
      <attributes>[[f, u, n, c, 1, , A, I, , p, v, , l, o, w, e, r, , r, a, n, g, e, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>\n    func1_AI_simula</try>
      <identifier>
        <try>\n    func1_AI_simula</try>
        <success>}\n</success>
        <attributes>[[f, u, n, c, 1, , A, I, , s, i, m, u, l, a, t, e, , v, a, l, u, e]]</attributes>
      </identifier>
      <success>}\n</success>
      <attributes>[[f, u, n, c, 1, , A, I, , s, i, m, u, l, a, t, e, , v, a, l, u, e]]</attributes>
    </def_dependency>
    <success>}\n</success>
    <attributes>[[[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e], [t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , l, o, w, e, r, , v, a, l, u, e], [t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , u, p, p, e, r, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , p, v, , u, p, p, e, r, , r, a, n, g, e, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , p, v, , l, o, w, e, r, , r, a, n, g, e, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , s, i, m, u, l, a, t, e, , v, a, l, u, e]]]</attributes>
  </def_dependencies>
  <success>\n</success>
  <attributes>[[[U, N, I, T], [T, B, , S, E, N, S, O, R, , P, R, I, M, A, R, Y, , V, A, L, U, E, , U, N, I, T], [t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e, , u, n, i, t], [[t, r, a, n, s, 1, , p, r, i, m, a, r, y, , v, a, l, u, e], [t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , l, o, w, e, r, , v, a, l, u, e], [t, r, a, n, s, 1, , s, c, a, l, e, , o, u, t, , u, p, p, e, r, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , p, v, , u, p, p, e, r, , r, a, n, g, e, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , p, v, , l, o, w, e, r, , r, a, n, g, e, , v, a, l, u, e], [f, u, n, c, 1, , A, I, , s, i, m, u, l, a, t, e, , v, a, l, u, e]]]]</attributes>
</start>

Console Output

Bytes left = 0 -> SUCCEEDED
Type = UNIT
Name = TBSENSORPRIMARYVALUEUNIT
Dependent = trans1primaryvalueunit
Dependency = trans1primaryvalue
Dependency = trans1scaleoutlowervalue
Dependency = trans1scaleoutuppervalue
Dependency = func1AIpvupperrangevalue
Dependency = func1AIpvlowerrangevalue
Dependency = func1AIsimulatevalue

Parsing Multiple Definitions

We already have a grammar for a single definition. To parse multiple definitions, we just reuse this.

Let's make some minor modifications to the code:

  • std::vector<definition> d;
  • bool r = qi::phrase_parse(iter, end, +g, s, d);

Live on Coliru