Tänd Taklampan Först

A nice property of Common Lisp is its macro system which made it possible to have things like cl-who, which allowed to embed HTML-code in Lisp syntax directly into the code.

With the new features of C++11, I wrote this little code snipped which does something similar:

/* we assume everything utf-8 */

#include <iostream>
#include <string>
#include <sstream>
#include <deque>
#include <cassert>

class Attr {
public:
  std::string _name;
  std::string _value;
  Attr (std::string const& n, const char* v)
    : _name(n), _value(v) {};

  void text_(std::stringstream& ss) const {
    ss << _name << "=\"";
    // NOTICE: this works only with UTF-8
    for (char c : _value) {
      switch (c) {
      case '&':
        ss << "&amp;";
        break;
      case '"':
        ss << "&quot;";
        break;
      case '<':
        ss << "&lt;";
        break;
      case '>':
        ss << "&gt;";
        break;
      default:
        ss << c;
      }
    }
    ss << '"';
  }

  std::string text() const {
    std::stringstream ss;
    text_(ss);
    ss.flush();
    return ss.str();
  }
};

class AttrTrunk {
public:
  std::string _name;
  AttrTrunk(std::string & n) : _name(n) {};
  Attr operator= (const char* a) {
    Attr ret(_name, a);
    return ret;
  }
};

class AttrList {
public:
  std::deque<Attr> attributes;
  AttrList () {};
  void text_ (std::stringstream& ss) const {
    for (Attr const& c : attributes) {
      c.text_(ss);
      ss << " ";
    }
  }
  std::string text () const {
    std::stringstream ss;
    text_(ss);
    ss.flush();
    return ss.str();
  }
};

AttrList operator , (Attr const& a, Attr const& b) {
  AttrList ret;
  ret.attributes.push_back(a);
  ret.attributes.push_back(b);
  return ret;
}

AttrList& operator , (AttrList& a, Attr const& b) {
  a.attributes.push_back(b);
  return a;
}

AttrTrunk operator "" _att (const char* a, size_t len) {
  std::string b("");
  for (size_t i = 0; i < len; ++i) b.push_back(a[i]);
  AttrTrunk ret(b);
  return ret;
}

class Tag;

std::deque<Tag> empty;
AttrList empty_attr;

class Tag {
public:
  std::string _name; // or cleartext
  AttrList _attrs;
  std::deque<Tag> _tags;
  bool _empty;
  bool _text;
  bool _as_is;
  // _empty means that the tag should be of the form <bla />. If it is
  // not set, it will be <bla></bla>. We distinguish this, because
  // <script ... /> does not work everywhere.
  Tag (std::string& n, AttrList& attrs, std::deque<Tag>& subtags, bool empty) :
    _name(n), _attrs(attrs), _tags(subtags), _empty(empty), _text(false), _as_is(false) {
    assert (!empty || subtags.size() == 0);
  }

  Tag (const char* cleartext)
    : _tags(empty), _attrs(empty_attr), _text(true), _name(cleartext), _as_is(false) {}

  Tag (std::string& cleartext)
    : _tags(empty), _attrs(empty_attr), _text(true),
      _name(cleartext), _as_is(false) {}

  static Tag as_is (const char* text) {
    Tag ret(text);
    ret._as_is = true;
    return ret;
  }

  void text_ (std::stringstream& ss) const {
    if (_as_is) {
      ss << _name;
    } else if (_text) {
      for (char c : _name) {
        switch (c) {
        case '<':
          ss << "&lt;";
          break;
        case '>':
          ss << "&gt;";
          break;
        case '&':
          ss << "&amp;";
          break;
        default:
          ss << c;
        }
      }
    } else {
      ss << "<" << _name << " ";
      _attrs.text_(ss);
      if (_empty) {
        ss << " />";
      } else {
        ss << ">";
        for (Tag const& tg : _tags) {
          tg.text_(ss);
        }
        ss << "</" << _name << ">";
      }
    }
  }
  std::string text() const {
    std::stringstream ss;
    text_(ss);
    ss.flush();
    return ss.str();
  }
};

// Tag with (possibly empty) attlist but no content yet
class TagTrunk2 {
public:
  std::string _name;
  AttrList _attrs;
  TagTrunk2 (std::string n, AttrList al)
    : _name(n), _attrs(al) {}

  // for the difference between [] with an empty list and (), see
  // class Tag

  Tag operator [] (std::deque<Tag> subtags) {
    Tag t (_name, _attrs, subtags, false);
    return t;
  }

  Tag operator [] (Tag subtag) {
    std::deque<Tag> subtags;
    subtags.push_back(subtag);
    Tag t (_name, _attrs, subtags, false);
    return t;
  }
  Tag operator () () {
    std::deque<Tag> trash;
    Tag t (_name, _attrs, trash, true);
    return t;
  }
};

std::deque<Tag> operator , (Tag a, Tag b) {
  std::deque<Tag> ret;
  ret.push_back(a);
  ret.push_back(b);
  return ret;
}

std::deque<Tag>& operator , (std::deque<Tag>& a, Tag b) {
  a.push_back(b);
  return a;
}

// Tag without attlist and content yet
class TagTrunk1 {
public:
  std::string _name;
  TagTrunk1 (std::string n) : _name(n) {}
  TagTrunk2 operator [] (AttrList a) {
    TagTrunk2 ret (_name, a);
    return ret;
  }
  TagTrunk2 operator [] (Attr a) {
    AttrList b;
    b.attributes.push_back(a);
    TagTrunk2 ret (_name, b);
    return ret;
  }
  TagTrunk2 operator () () {
    // use this when no attributes are present
    AttrList b;
    TagTrunk2 ret (_name, b);
    return ret;
  }
};

TagTrunk1 operator "" _tag (const char* a, size_t length) {
  std::string b("");
  for (size_t i = 0; i < length; ++i) b.push_back(a[i]);
  TagTrunk1 ret(b);
  return ret;
}

Tag operator "" _txt (const char* a, size_t length) {
  std::string b("");
  for (size_t i = 0; i < length; ++i) b.push_back(a[i]);
  Tag ret(b);
  return ret;
}

Tag operator "" _as_is (const char* a, size_t length) {
  return Tag::as_is(a);
}

int main (void) {

  std::cout <<
    ( "html"_tag ()
      ["head"_tag ()
       ["title"_tag () [ "Meine Seite"_txt ],
        "script"_tag ["type"_att = "text/javascript",
                      "src"_att = "hallo.js"][empty]],
       "body"_tag () ["Hallo Welt!"_txt, "<![CDATA[ bla ]]>"_as_is]] ).text() << std::endl;

}

Have fun. Regard the code as public domain. You C++-Nerds, please make some nice library out of it that does encoding and collation and shit correctly and release it freely so I can use it.