1#! /bin/sh
2# -*- tcl -*- \
3exec tclsh "$0" ${1+"$@"}
4
5# Make CSV data the specified column unique.
6
7package require csv
8package require cmdline
9
10# ----------------------------------------------------
11# csvuniq ?-sep sepchar? column file.in|- file.out|-
12#
13# Argument processing and checks.
14
15set sepChar ,
16
17set usage "Usage: $argv0 ?-sep sepchar? column file.in|- file.out|-"
18
19while {[set ok [cmdline::getopt argv {sep.arg} opt val]] > 0} {
20    #puts stderr "= $opt $val"
21    switch -exact -- $opt {
22	sep  {set sepChar $val}
23    }
24}
25if {($ok < 0) || ([llength $argv] != 3)} {
26    puts stderr $usage
27    exit -1
28}
29
30foreach {uniCol in out} $argv break
31
32if {
33    ![string is integer $uniCol] ||
34    ($uniCol < 0)                ||
35    ![string compare $in  ""]     ||
36    ![string compare $out ""]
37} {
38    puts stderr $usage
39    exit -1    
40}
41
42if {![string compare $in -]} {
43    set in stdin
44} else {
45    set in [open $in r]
46}
47if {![string compare $out -]} {
48    set out stdout
49} else {
50    set out [open $out w]
51}
52
53# ----------------------------------------------------
54# Actual processing, uses the following information from the
55# commandline:
56#
57# in      - channel for input
58# out     - channel for output
59# sepChar - separator character
60# uniCol  - column to make unique
61
62set last ""
63set first 1
64
65while {![eof $in]} {
66    if {[gets $in line] < 0} {
67	continue
68    }
69
70    set data [::csv::split $line $sepChar]
71
72    if {$first} {
73	set first 0
74	set last  [lindex $data $uniCol]
75	puts $out [::csv::join $data $sepChar]
76    } elseif {[string compare $last [lindex $data $uniCol]] != 0} {
77	set last  [lindex $data $uniCol]
78	puts $out [::csv::join $data $sepChar]
79    } ; # else {no change in column, ignore record}
80}
81
82exit ; # automatically closes the channels
83