1234285Sdim//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim// 10234285Sdim// SequenceToOffsetTable can be used to emit a number of null-terminated 11234285Sdim// sequences as one big array. Use the same memory when a sequence is a suffix 12234285Sdim// of another. 13234285Sdim// 14234285Sdim//===----------------------------------------------------------------------===// 15234285Sdim 16234285Sdim#ifndef TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 17234285Sdim#define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 18234285Sdim 19234285Sdim#include "llvm/Support/raw_ostream.h" 20234285Sdim#include <algorithm> 21234285Sdim#include <cassert> 22234285Sdim#include <cctype> 23249423Sdim#include <functional> 24263508Sdim#include <map> 25249423Sdim#include <vector> 26234285Sdim 27234285Sdimnamespace llvm { 28234285Sdim 29234285Sdim/// SequenceToOffsetTable - Collect a number of terminated sequences of T. 30234285Sdim/// Compute the layout of a table that contains all the sequences, possibly by 31234285Sdim/// reusing entries. 32234285Sdim/// 33243830Sdim/// @tparam SeqT The sequence container. (vector or string). 34243830Sdim/// @tparam Less A stable comparator for SeqT elements. 35234285Sdimtemplate<typename SeqT, typename Less = std::less<typename SeqT::value_type> > 36234285Sdimclass SequenceToOffsetTable { 37234285Sdim typedef typename SeqT::value_type ElemT; 38234285Sdim 39234285Sdim // Define a comparator for SeqT that sorts a suffix immediately before a 40234285Sdim // sequence with that suffix. 41234285Sdim struct SeqLess : public std::binary_function<SeqT, SeqT, bool> { 42234285Sdim Less L; 43234285Sdim bool operator()(const SeqT &A, const SeqT &B) const { 44234285Sdim return std::lexicographical_compare(A.rbegin(), A.rend(), 45234285Sdim B.rbegin(), B.rend(), L); 46234285Sdim } 47234285Sdim }; 48234285Sdim 49234285Sdim // Keep sequences ordered according to SeqLess so suffixes are easy to find. 50234285Sdim // Map each sequence to its offset in the table. 51234285Sdim typedef std::map<SeqT, unsigned, SeqLess> SeqMap; 52234285Sdim 53234285Sdim // Sequences added so far, with suffixes removed. 54234285Sdim SeqMap Seqs; 55234285Sdim 56234285Sdim // Entries in the final table, or 0 before layout was called. 57234285Sdim unsigned Entries; 58234285Sdim 59234285Sdim // isSuffix - Returns true if A is a suffix of B. 60234285Sdim static bool isSuffix(const SeqT &A, const SeqT &B) { 61234285Sdim return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin()); 62234285Sdim } 63234285Sdim 64234285Sdimpublic: 65234285Sdim SequenceToOffsetTable() : Entries(0) {} 66234285Sdim 67234285Sdim /// add - Add a sequence to the table. 68234285Sdim /// This must be called before layout(). 69234285Sdim void add(const SeqT &Seq) { 70234285Sdim assert(Entries == 0 && "Cannot call add() after layout()"); 71234285Sdim typename SeqMap::iterator I = Seqs.lower_bound(Seq); 72234285Sdim 73234285Sdim // If SeqMap contains a sequence that has Seq as a suffix, I will be 74234285Sdim // pointing to it. 75234285Sdim if (I != Seqs.end() && isSuffix(Seq, I->first)) 76234285Sdim return; 77234285Sdim 78234285Sdim I = Seqs.insert(I, std::make_pair(Seq, 0u)); 79234285Sdim 80234285Sdim // The entry before I may be a suffix of Seq that can now be erased. 81234285Sdim if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) 82234285Sdim Seqs.erase(I); 83234285Sdim } 84234285Sdim 85239462Sdim bool empty() const { return Seqs.empty(); } 86243830Sdim 87234285Sdim /// layout - Computes the final table layout. 88234285Sdim void layout() { 89234285Sdim assert(Entries == 0 && "Can only call layout() once"); 90234285Sdim // Lay out the table in Seqs iteration order. 91234285Sdim for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E; 92234285Sdim ++I) { 93234285Sdim I->second = Entries; 94234285Sdim // Include space for a terminator. 95234285Sdim Entries += I->first.size() + 1; 96234285Sdim } 97234285Sdim } 98234285Sdim 99234285Sdim /// get - Returns the offset of Seq in the final table. 100234285Sdim unsigned get(const SeqT &Seq) const { 101234285Sdim assert(Entries && "Call layout() before get()"); 102234285Sdim typename SeqMap::const_iterator I = Seqs.lower_bound(Seq); 103234285Sdim assert(I != Seqs.end() && isSuffix(Seq, I->first) && 104234285Sdim "get() called with sequence that wasn't added first"); 105234285Sdim return I->second + (I->first.size() - Seq.size()); 106234285Sdim } 107234285Sdim 108234285Sdim /// emit - Print out the table as the body of an array initializer. 109234285Sdim /// Use the Print function to print elements. 110234285Sdim void emit(raw_ostream &OS, 111234285Sdim void (*Print)(raw_ostream&, ElemT), 112234285Sdim const char *Term = "0") const { 113234285Sdim assert(Entries && "Call layout() before emit()"); 114234285Sdim for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end(); 115234285Sdim I != E; ++I) { 116234285Sdim OS << " /* " << I->second << " */ "; 117234285Sdim for (typename SeqT::const_iterator SI = I->first.begin(), 118234285Sdim SE = I->first.end(); SI != SE; ++SI) { 119234285Sdim Print(OS, *SI); 120234285Sdim OS << ", "; 121234285Sdim } 122234285Sdim OS << Term << ",\n"; 123234285Sdim } 124234285Sdim } 125234285Sdim}; 126234285Sdim 127234285Sdim// Helper function for SequenceToOffsetTable<string>. 128234285Sdimstatic inline void printChar(raw_ostream &OS, char C) { 129234285Sdim unsigned char UC(C); 130234285Sdim if (isalnum(UC) || ispunct(UC)) { 131234285Sdim OS << '\''; 132234285Sdim if (C == '\\' || C == '\'') 133234285Sdim OS << '\\'; 134234285Sdim OS << C << '\''; 135234285Sdim } else { 136234285Sdim OS << unsigned(UC); 137234285Sdim } 138234285Sdim} 139234285Sdim 140234285Sdim} // end namespace llvm 141234285Sdim 142234285Sdim#endif 143