1/******************************************************************** 2 * * 3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 7 * * 8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * 9 * by the Xiph.Org Foundation http://www.xiph.org/ * 10 * * 11 ******************************************************************** 12 13 function: utility main for training codebooks 14 last mod: $Id: train.c 16037 2009-05-26 21:10:58Z xiphmont $ 15 16 ********************************************************************/ 17 18#include <stdlib.h> 19#include <stdio.h> 20#include <math.h> 21#include <string.h> 22#include <errno.h> 23#include <signal.h> 24#include "vqgen.h" 25#include "vqext.h" 26#include "bookutil.h" 27 28static char *rline(FILE *in,FILE *out,int pass){ 29 while(1){ 30 char *line=get_line(in); 31 if(line && line[0]=='#'){ 32 if(pass)fprintf(out,"%s\n",line); 33 }else{ 34 return(line); 35 } 36 } 37} 38 39/* command line: 40 trainvq vqfile [options] trainfile [trainfile] 41 42 options: -params entries,dim,quant 43 -subvector start[,num] 44 -error desired_error 45 -iterations iterations 46*/ 47 48static void usage(void){ 49 fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n" 50 "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n" 51 "options: -p[arams] <entries,dim,quant>\n" 52 " -s[ubvector] <start[,num]>\n" 53 " -e[rror] <desired_error>\n" 54 " -i[terations] <maxiterations>\n" 55 " -d[istance] quantization mesh spacing for density limitation\n" 56 " -b <dummy> eliminate cell size biasing; use normal LBG\n\n" 57 " -c <dummy> Use centroid (not median) midpoints\n" 58 59 "examples:\n" 60 " train a new codebook to 1%% tolerance on datafile 'foo':\n" 61 " xxxvqtrain book -p 256,6,8 -e .01 foo\n" 62 " (produces a trained set in book-0.vqi)\n\n" 63 " continue training 'book-0.vqi' (produces book-1.vqi):\n" 64 " xxxvqtrain book-0.vqi\n\n" 65 " add subvector from element 1 to <dimension> from files\n" 66 " data*.m to the training in progress, prodicing book-1.vqi:\n" 67 " xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype); 68} 69 70int exiting=0; 71void setexit(int dummy){ 72 fprintf(stderr,"\nexiting... please wait to finish this iteration\n"); 73 exiting=1; 74} 75 76int main(int argc,char *argv[]){ 77 vqgen v; 78 79 int entries=-1,dim=-1; 80 int start=0,num=-1; 81 float desired=.05f,mindist=0.f; 82 int iter=1000; 83 int biasp=1; 84 int centroid=0; 85 86 FILE *out=NULL; 87 char *line; 88 long i,j,k; 89 int init=0; 90 q.quant=-1; 91 92 argv++; 93 if(!*argv){ 94 usage(); 95 exit(0); 96 } 97 98 /* get the book name, a preexisting book to continue training */ 99 { 100 FILE *in=NULL; 101 char *filename=alloca(strlen(*argv)+30),*ptr; 102 103 strcpy(filename,*argv); 104 in=fopen(filename,"r"); 105 ptr=strrchr(filename,'-'); 106 if(ptr){ 107 int num; 108 ptr++; 109 num=atoi(ptr); 110 sprintf(ptr,"%d.vqi",num+1); 111 }else 112 strcat(filename,"-0.vqi"); 113 114 out=fopen(filename,"w"); 115 if(out==NULL){ 116 fprintf(stderr,"Unable to open %s for writing\n",filename); 117 exit(1); 118 } 119 120 if(in){ 121 /* we wish to suck in a preexisting book and continue to train it */ 122 float a; 123 124 line=rline(in,out,1); 125 if(strcmp(line,vqext_booktype)){ 126 fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype); 127 exit(1); 128 } 129 130 line=rline(in,out,1); 131 if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){ 132 fprintf(stderr,"Syntax error reading book file\n"); 133 exit(1); 134 } 135 136 vqgen_init(&v,dim,vqext_aux,entries,mindist, 137 vqext_metric,vqext_weight,centroid); 138 init=1; 139 140 /* quant setup */ 141 line=rline(in,out,1); 142 if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta, 143 &q.quant,&q.sequencep)!=4){ 144 fprintf(stderr,"Syntax error reading book file\n"); 145 exit(1); 146 } 147 148 /* quantized entries */ 149 i=0; 150 for(j=0;j<entries;j++){ 151 for(k=0;k<dim;k++){ 152 line=rline(in,out,0); 153 sscanf(line,"%f",&a); 154 v.entrylist[i++]=a; 155 } 156 } 157 vqgen_unquantize(&v,&q); 158 159 /* bias */ 160 i=0; 161 for(j=0;j<entries;j++){ 162 line=rline(in,out,0); 163 sscanf(line,"%f",&a); 164 v.bias[i++]=a; 165 } 166 167 v.seeded=1; 168 { 169 float *b=alloca((dim+vqext_aux)*sizeof(float)); 170 i=0; 171 while(1){ 172 for(k=0;k<dim+vqext_aux;k++){ 173 line=rline(in,out,0); 174 if(!line)break; 175 sscanf(line,"%f",b+k); 176 } 177 if(feof(in))break; 178 vqgen_addpoint(&v,b,b+dim); 179 } 180 } 181 182 fclose(in); 183 } 184 } 185 186 /* get the rest... */ 187 argv=argv++; 188 while(*argv){ 189 if(argv[0][0]=='-'){ 190 /* it's an option */ 191 if(!argv[1]){ 192 fprintf(stderr,"Option %s missing argument.\n",argv[0]); 193 exit(1); 194 } 195 switch(argv[0][1]){ 196 case 'p': 197 if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3) 198 goto syner; 199 break; 200 case 's': 201 if(sscanf(argv[1],"%d,%d",&start,&num)!=2){ 202 num= -1; 203 if(sscanf(argv[1],"%d",&start)!=1) 204 goto syner; 205 } 206 break; 207 case 'e': 208 if(sscanf(argv[1],"%f",&desired)!=1) 209 goto syner; 210 break; 211 case 'd': 212 if(sscanf(argv[1],"%f",&mindist)!=1) 213 goto syner; 214 if(init)v.mindist=mindist; 215 break; 216 case 'i': 217 if(sscanf(argv[1],"%d",&iter)!=1) 218 goto syner; 219 break; 220 case 'b': 221 biasp=0; 222 break; 223 case 'c': 224 centroid=1; 225 break; 226 default: 227 fprintf(stderr,"Unknown option %s\n",argv[0]); 228 exit(1); 229 } 230 argv+=2; 231 }else{ 232 /* it's an input file */ 233 char *file=strdup(*argv++); 234 FILE *in; 235 int cols=-1; 236 237 if(!init){ 238 if(dim==-1 || entries==-1 || q.quant==-1){ 239 fprintf(stderr,"-p required when training a new set\n"); 240 exit(1); 241 } 242 vqgen_init(&v,dim,vqext_aux,entries,mindist, 243 vqext_metric,vqext_weight,centroid); 244 init=1; 245 } 246 247 in=fopen(file,"r"); 248 if(in==NULL){ 249 fprintf(stderr,"Could not open input file %s\n",file); 250 exit(1); 251 } 252 fprintf(out,"# training file entry: %s\n",file); 253 254 while((line=rline(in,out,0))){ 255 if(cols==-1){ 256 char *temp=line; 257 while(*temp==' ')temp++; 258 for(cols=0;*temp;cols++){ 259 while(*temp>32)temp++; 260 while(*temp==' ')temp++; 261 } 262 263 fprintf(stderr,"%d colums per line in file %s\n",cols,file); 264 265 } 266 { 267 int i; 268 float b[cols]; 269 if(start+num*dim>cols){ 270 fprintf(stderr,"ran out of columns reading %s\n",file); 271 exit(1); 272 } 273 while(*line==' ')line++; 274 for(i=0;i<cols;i++){ 275 276 /* static length buffer bug workaround */ 277 char *temp=line; 278 char old; 279 while(*temp>32)temp++; 280 281 old=temp[0]; 282 temp[0]='\0'; 283 b[i]=atof(line); 284 temp[0]=old; 285 286 while(*line>32)line++; 287 while(*line==' ')line++; 288 } 289 if(num<=0)num=(cols-start)/dim; 290 for(i=0;i<num;i++) 291 vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num); 292 293 } 294 } 295 fclose(in); 296 } 297 } 298 299 if(!init){ 300 fprintf(stderr,"No input files!\n"); 301 exit(1); 302 } 303 304 vqext_preprocess(&v); 305 306 /* train the book */ 307 signal(SIGTERM,setexit); 308 signal(SIGINT,setexit); 309 310 for(i=0;i<iter && !exiting;i++){ 311 float result; 312 if(i!=0){ 313 vqgen_unquantize(&v,&q); 314 vqgen_cellmetric(&v); 315 } 316 result=vqgen_iterate(&v,biasp); 317 vqext_quantize(&v,&q); 318 if(result<desired)break; 319 } 320 321 /* save the book */ 322 323 fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n"); 324 fprintf(out,"%s\n",vqext_booktype); 325 fprintf(out,"%d %d %d\n",entries,dim,vqext_aux); 326 fprintf(out,"%ld %ld %d %d\n", 327 q.min,q.delta,q.quant,q.sequencep); 328 329 /* quantized entries */ 330 fprintf(out,"# quantized entries---\n"); 331 i=0; 332 for(j=0;j<entries;j++) 333 for(k=0;k<dim;k++) 334 fprintf(out,"%d\n",(int)(rint(v.entrylist[i++]))); 335 336 fprintf(out,"# biases---\n"); 337 i=0; 338 for(j=0;j<entries;j++) 339 fprintf(out,"%f\n",v.bias[i++]); 340 341 /* we may have done the density limiting mesh trick; refetch the 342 training points from the temp file */ 343 344 rewind(v.asciipoints); 345 fprintf(out,"# points---\n"); 346 { 347 /* sloppy, no error handling */ 348 long bytes; 349 char buff[4096]; 350 while((bytes=fread(buff,1,4096,v.asciipoints))) 351 while(bytes)bytes-=fwrite(buff,1,bytes,out); 352 } 353 354 fclose(out); 355 fclose(v.asciipoints); 356 357 vqgen_unquantize(&v,&q); 358 vqgen_cellmetric(&v); 359 exit(0); 360 361 syner: 362 fprintf(stderr,"Syntax error in argument '%s'\n",*argv); 363 exit(1); 364} 365