1234285Sdim//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim// 10234285Sdim// SequenceToOffsetTable can be used to emit a number of null-terminated 11234285Sdim// sequences as one big array. Use the same memory when a sequence is a suffix 12234285Sdim// of another. 13234285Sdim// 14234285Sdim//===----------------------------------------------------------------------===// 15234285Sdim 16234285Sdim#ifndef TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 17234285Sdim#define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 18234285Sdim 19234285Sdim#include "llvm/Support/raw_ostream.h" 20234285Sdim#include <algorithm> 21234285Sdim#include <cassert> 22234285Sdim#include <cctype> 23249423Sdim#include <functional> 24249423Sdim#include <vector> 25234285Sdim 26234285Sdimnamespace llvm { 27234285Sdim 28234285Sdim/// SequenceToOffsetTable - Collect a number of terminated sequences of T. 29234285Sdim/// Compute the layout of a table that contains all the sequences, possibly by 30234285Sdim/// reusing entries. 31234285Sdim/// 32243830Sdim/// @tparam SeqT The sequence container. (vector or string). 33243830Sdim/// @tparam Less A stable comparator for SeqT elements. 34234285Sdimtemplate<typename SeqT, typename Less = std::less<typename SeqT::value_type> > 35234285Sdimclass SequenceToOffsetTable { 36234285Sdim typedef typename SeqT::value_type ElemT; 37234285Sdim 38234285Sdim // Define a comparator for SeqT that sorts a suffix immediately before a 39234285Sdim // sequence with that suffix. 40234285Sdim struct SeqLess : public std::binary_function<SeqT, SeqT, bool> { 41234285Sdim Less L; 42234285Sdim bool operator()(const SeqT &A, const SeqT &B) const { 43234285Sdim return std::lexicographical_compare(A.rbegin(), A.rend(), 44234285Sdim B.rbegin(), B.rend(), L); 45234285Sdim } 46234285Sdim }; 47234285Sdim 48234285Sdim // Keep sequences ordered according to SeqLess so suffixes are easy to find. 49234285Sdim // Map each sequence to its offset in the table. 50234285Sdim typedef std::map<SeqT, unsigned, SeqLess> SeqMap; 51234285Sdim 52234285Sdim // Sequences added so far, with suffixes removed. 53234285Sdim SeqMap Seqs; 54234285Sdim 55234285Sdim // Entries in the final table, or 0 before layout was called. 56234285Sdim unsigned Entries; 57234285Sdim 58234285Sdim // isSuffix - Returns true if A is a suffix of B. 59234285Sdim static bool isSuffix(const SeqT &A, const SeqT &B) { 60234285Sdim return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin()); 61234285Sdim } 62234285Sdim 63234285Sdimpublic: 64234285Sdim SequenceToOffsetTable() : Entries(0) {} 65234285Sdim 66234285Sdim /// add - Add a sequence to the table. 67234285Sdim /// This must be called before layout(). 68234285Sdim void add(const SeqT &Seq) { 69234285Sdim assert(Entries == 0 && "Cannot call add() after layout()"); 70234285Sdim typename SeqMap::iterator I = Seqs.lower_bound(Seq); 71234285Sdim 72234285Sdim // If SeqMap contains a sequence that has Seq as a suffix, I will be 73234285Sdim // pointing to it. 74234285Sdim if (I != Seqs.end() && isSuffix(Seq, I->first)) 75234285Sdim return; 76234285Sdim 77234285Sdim I = Seqs.insert(I, std::make_pair(Seq, 0u)); 78234285Sdim 79234285Sdim // The entry before I may be a suffix of Seq that can now be erased. 80234285Sdim if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) 81234285Sdim Seqs.erase(I); 82234285Sdim } 83234285Sdim 84239462Sdim bool empty() const { return Seqs.empty(); } 85243830Sdim 86234285Sdim /// layout - Computes the final table layout. 87234285Sdim void layout() { 88234285Sdim assert(Entries == 0 && "Can only call layout() once"); 89234285Sdim // Lay out the table in Seqs iteration order. 90234285Sdim for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E; 91234285Sdim ++I) { 92234285Sdim I->second = Entries; 93234285Sdim // Include space for a terminator. 94234285Sdim Entries += I->first.size() + 1; 95234285Sdim } 96234285Sdim } 97234285Sdim 98234285Sdim /// get - Returns the offset of Seq in the final table. 99234285Sdim unsigned get(const SeqT &Seq) const { 100234285Sdim assert(Entries && "Call layout() before get()"); 101234285Sdim typename SeqMap::const_iterator I = Seqs.lower_bound(Seq); 102234285Sdim assert(I != Seqs.end() && isSuffix(Seq, I->first) && 103234285Sdim "get() called with sequence that wasn't added first"); 104234285Sdim return I->second + (I->first.size() - Seq.size()); 105234285Sdim } 106234285Sdim 107234285Sdim /// emit - Print out the table as the body of an array initializer. 108234285Sdim /// Use the Print function to print elements. 109234285Sdim void emit(raw_ostream &OS, 110234285Sdim void (*Print)(raw_ostream&, ElemT), 111234285Sdim const char *Term = "0") const { 112234285Sdim assert(Entries && "Call layout() before emit()"); 113234285Sdim for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end(); 114234285Sdim I != E; ++I) { 115234285Sdim OS << " /* " << I->second << " */ "; 116234285Sdim for (typename SeqT::const_iterator SI = I->first.begin(), 117234285Sdim SE = I->first.end(); SI != SE; ++SI) { 118234285Sdim Print(OS, *SI); 119234285Sdim OS << ", "; 120234285Sdim } 121234285Sdim OS << Term << ",\n"; 122234285Sdim } 123234285Sdim } 124234285Sdim}; 125234285Sdim 126234285Sdim// Helper function for SequenceToOffsetTable<string>. 127234285Sdimstatic inline void printChar(raw_ostream &OS, char C) { 128234285Sdim unsigned char UC(C); 129234285Sdim if (isalnum(UC) || ispunct(UC)) { 130234285Sdim OS << '\''; 131234285Sdim if (C == '\\' || C == '\'') 132234285Sdim OS << '\\'; 133234285Sdim OS << C << '\''; 134234285Sdim } else { 135234285Sdim OS << unsigned(UC); 136234285Sdim } 137234285Sdim} 138234285Sdim 139234285Sdim} // end namespace llvm 140234285Sdim 141234285Sdim#endif 142