SequenceToOffsetTable.h revision 239462
1117395Skan//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===// 2169689Skan// 3169689Skan// The LLVM Compiler Infrastructure 490075Sobrien// 590075Sobrien// This file is distributed under the University of Illinois Open Source 690075Sobrien// License. See LICENSE.TXT for details. 790075Sobrien// 890075Sobrien//===----------------------------------------------------------------------===// 990075Sobrien// 1090075Sobrien// SequenceToOffsetTable can be used to emit a number of null-terminated 1190075Sobrien// sequences as one big array. Use the same memory when a sequence is a suffix 1290075Sobrien// of another. 1390075Sobrien// 1490075Sobrien//===----------------------------------------------------------------------===// 1590075Sobrien 1690075Sobrien#ifndef TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 1790075Sobrien#define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H 1890075Sobrien 1990075Sobrien#include "llvm/Support/raw_ostream.h" 20169689Skan#include <functional> 21169689Skan#include <algorithm> 2290075Sobrien#include <vector> 2390075Sobrien#include <cassert> 2490075Sobrien#include <cctype> 2590075Sobrien 2690075Sobriennamespace llvm { 2790075Sobrien 2890075Sobrien/// SequenceToOffsetTable - Collect a number of terminated sequences of T. 2990075Sobrien/// Compute the layout of a table that contains all the sequences, possibly by 3090075Sobrien/// reusing entries. 31132718Skan/// 32132718Skan/// @param SeqT The sequence container. (vector or string). 3390075Sobrien/// @param Less A stable comparator for SeqT elements. 34169689Skantemplate<typename SeqT, typename Less = std::less<typename SeqT::value_type> > 3590075Sobrienclass SequenceToOffsetTable { 3690075Sobrien typedef typename SeqT::value_type ElemT; 3790075Sobrien 3890075Sobrien // Define a comparator for SeqT that sorts a suffix immediately before a 3990075Sobrien // sequence with that suffix. 4090075Sobrien struct SeqLess : public std::binary_function<SeqT, SeqT, bool> { 41117395Skan Less L; 42117395Skan bool operator()(const SeqT &A, const SeqT &B) const { 43169689Skan return std::lexicographical_compare(A.rbegin(), A.rend(), 4490075Sobrien B.rbegin(), B.rend(), L); 4590075Sobrien } 4690075Sobrien }; 47132718Skan 4890075Sobrien // Keep sequences ordered according to SeqLess so suffixes are easy to find. 49132718Skan // Map each sequence to its offset in the table. 50132718Skan typedef std::map<SeqT, unsigned, SeqLess> SeqMap; 51132718Skan 52132718Skan // Sequences added so far, with suffixes removed. 5390075Sobrien SeqMap Seqs; 54132718Skan 55132718Skan // Entries in the final table, or 0 before layout was called. 56132718Skan unsigned Entries; 57132718Skan 58132718Skan // isSuffix - Returns true if A is a suffix of B. 59132718Skan static bool isSuffix(const SeqT &A, const SeqT &B) { 6090075Sobrien return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin()); 6190075Sobrien } 6290075Sobrien 6390075Sobrienpublic: 6490075Sobrien SequenceToOffsetTable() : Entries(0) {} 6590075Sobrien 66132718Skan /// add - Add a sequence to the table. 67132718Skan /// This must be called before layout(). 6890075Sobrien void add(const SeqT &Seq) { 69132718Skan assert(Entries == 0 && "Cannot call add() after layout()"); 7090075Sobrien typename SeqMap::iterator I = Seqs.lower_bound(Seq); 7190075Sobrien 72132718Skan // If SeqMap contains a sequence that has Seq as a suffix, I will be 7390075Sobrien // pointing to it. 74132718Skan if (I != Seqs.end() && isSuffix(Seq, I->first)) 7590075Sobrien return; 76132718Skan 7790075Sobrien I = Seqs.insert(I, std::make_pair(Seq, 0u)); 7890075Sobrien 7990075Sobrien // The entry before I may be a suffix of Seq that can now be erased. 8090075Sobrien if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) 81132718Skan Seqs.erase(I); 8290075Sobrien } 83132718Skan 8490075Sobrien bool empty() const { return Seqs.empty(); } 8590075Sobrien 8690075Sobrien /// layout - Computes the final table layout. 8790075Sobrien void layout() { 8890075Sobrien assert(Entries == 0 && "Can only call layout() once"); 89117395Skan // Lay out the table in Seqs iteration order. 90117395Skan for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E; 91117395Skan ++I) { 92132718Skan I->second = Entries; 93117395Skan // Include space for a terminator. 94132718Skan Entries += I->first.size() + 1; 95132718Skan } 96169689Skan } 97132718Skan 98117395Skan /// get - Returns the offset of Seq in the final table. 99132718Skan unsigned get(const SeqT &Seq) const { 100117395Skan assert(Entries && "Call layout() before get()"); 101169689Skan typename SeqMap::const_iterator I = Seqs.lower_bound(Seq); 102117395Skan assert(I != Seqs.end() && isSuffix(Seq, I->first) && 103132718Skan "get() called with sequence that wasn't added first"); 104169689Skan return I->second + (I->first.size() - Seq.size()); 105169689Skan } 106169689Skan 107169689Skan /// emit - Print out the table as the body of an array initializer. 108169689Skan /// Use the Print function to print elements. 109132718Skan void emit(raw_ostream &OS, 110132718Skan void (*Print)(raw_ostream&, ElemT), 111117395Skan const char *Term = "0") const { 112117395Skan assert(Entries && "Call layout() before emit()"); 113132718Skan for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end(); 114132718Skan I != E; ++I) { 115132718Skan OS << " /* " << I->second << " */ "; 116117395Skan for (typename SeqT::const_iterator SI = I->first.begin(), 117117395Skan SE = I->first.end(); SI != SE; ++SI) { 118169689Skan Print(OS, *SI); 119169689Skan OS << ", "; 120169689Skan } 121169689Skan OS << Term << ",\n"; 122169689Skan } 123169689Skan } 124117395Skan}; 125169689Skan 126169689Skan// Helper function for SequenceToOffsetTable<string>. 127169689Skanstatic inline void printChar(raw_ostream &OS, char C) { 128169689Skan unsigned char UC(C); 129169689Skan if (isalnum(UC) || ispunct(UC)) { 130169689Skan OS << '\''; 131117395Skan if (C == '\\' || C == '\'') 132117395Skan OS << '\\'; 133169689Skan OS << C << '\''; 134169689Skan } else { 135117395Skan OS << unsigned(UC); 136169689Skan } 137132718Skan} 138132718Skan 139117395Skan} // end namespace llvm 140169689Skan 141117395Skan#endif 142117395Skan