awk sorts 3d array


########################################################################
# PLT_report
#
# this is an awk program
# example of this program being invoked:
# awk -f PLT_report what_do="xxx" <datafile>
# where "xxx" = "IN" or "OUT" so as to have the appropiate header
# where <datafile> is the file that has been uploaded from
# the terminal to unix host (here), and had the file LOTTO_INFO
# appended to it.
#
# It will generate a summary report of the daily lottery tickets bought.
# The report "main" break is on 'play_date', the 2nd break is on
# 'lotto_id', with each line having: 'terminal_id', total # of bets &
# total cost & fee (calaulated by: using total # of bets).
#
#
# The datafile has two types of records in it:
# 1) the LOTTO_INFO records;
# 2) a "BET" record.
#
# The format for LOTTO_INFO is:
# 1 "LI" (a string constant used as a tag)
# 2 lotto_id (c4)
# 3 bets_per_buy (i3) [size is not important, this is field driven]
# 4 cost_per_buy (i7)
# 5 fee_per_buy (i7)
# 6 lotto_fn (c16) # fn - fullname
# # ---------- this prog does not use the following fields
# 7 selected (i3)
# 8 wins (i3)
# 9 entries (i3)
# 10 max (i3)
# 11 Hours offset (i3)
# 12-18 day flags (c1)
#
# The format for the "BET" record is
# transaction_code - which consist of:
# terminal_id (c5)
# "-"
# yy (year)
# ddd (julian day)
# "-"
# trans_num (c5 - a seq # for this terminal_id
# this day)
# lotto_id (c4)
# play_date (c8 - yy/mm/dd)
# numbers (c2 - seven of 'em, extras zeroed)
# country_code (c4)
# passport (c20)
#
#
# ------------------------------------------------------------------
# awk programming notes:
#
# 1st- this code is delibertly made to look 'C' like - please continue
#
# 2nd- "ARRAYS": awk can only handle 1 diminsional arrays. Subscripts
# can be any non-NULL value, including non-numeric strings
# (eg: 5 "7" ngc "rci" 3em "b42"), or a variable set equal to a
# non-NULL value. (the following are equivalint: ray[b3c],
# ray["b3c"], ray[s2] where s2="b3c".
# note: xyz == "xyz", "xyz" != "XYZ").
#
# Arrays are not formally declared, they "spring" into existence
# by being referenced. In fact, a given cell does not exist
# until it is referenced.
#
# It is possible to do psudo-multi-dimension arrays by using
# concatnation. Terminal IDs are unique, so are lottery IDs and
# play dates. So a subscript that is the concatnation of these
# three fields is also unique.
#
# -----------------------------------------------------------------------
#
# this program is going to build a summary array subscripted by the
# concatnation of play_date lotto_id terminal_id, which holds the
# total # of bets. It will also build a array that contains a list
# of the "subscript" used. This list will be sorted. When the list
# is walked, the summary records will be printed in sorted order.
#
# -----------------------------------------------------------------------
#
# 90.1127 - awc - created program
# 90.1128 - awc - merged in the code to build cut-files
# - problem: was skiping next line after doing a total
# solution: reorder the break control IFs
# 90.1129 - awc - added in code to handle FEEs
# 90.1205 - awc - added if so that blank input records are ignored
# - accepts input from the command line of "IN" or "OUT"
# 90.1206 - awc - accepts the terminal id from the command line for
# "IN" processing
# 90.1211 - awc - changed code to reflect changes made to LOTTO_INFO
# the "..." after the Lottery full name are now a seperate
# field, and the name length field is gone
# - changed the blank line if to also see if this line had
# a "#" as the first char so that LOTTO_INFO can have
# comments in it
# 90.1217 - awc - changes in LOTTO_INFO's format, effect comments only
# 90.1219 - awc - changed "LOTTO_INFO" to "LI"
#
########################################################################

# initialize variables

# note: P for play_date, L for lotto_id, T for terminal_id

BEGIN {
what_do = " "; # is set by the command line
what_term = ""; # is set by the command line

TRUE = 1; # used with boolen flags
FALSE = 0; # used with boolen flags

SP11 = " "; # 11 spaces - used in printing
TO11 = " Total: "; # 11 spaces - used in printing

flipflag = TRUE; # a flag used in the sort

i = 0; # used as a subscript to walk PLT_list
j = 0; # used as a subscript when sorting PLT_list

hold = ""; # used while sorting

num_PLT = 0; # the total number of entries in PLT_count

cur_T = ""; # convenient
cur_L = ""; # used as a break check, holds lotto_id
cur_P = ""; # used as a break check, holds play_date
cur_PLT = ""; # used as a subscript, holds P L & terminal_id
sub_PLT = ""; # used as a subscript during building

PLT_list[1] = ""; # is an array holding all the combnation of P L T

PLT_count[1] = 0; # is an array holding # of bets

bets = 1; # used as a subscript
cost = 2; # used as a subscript
fees = 3; # used as a subscript

PL_total[bets] = 0; # holds the total # of bets for this P L comb
PL_total[cost] = 0; # holds the total costs for this P L comb
PL_total[fees] = 0; # holds the total costs for this P L comb

P_total[bets] = 0; # holds the total # of bets for this play_date
P_total[cost] = 0; # holds the total costs for this play_date
P_total[fees] = 0; # holds the total costs for this play_date

grand_total[bets] = 0; # holds the total # of bets for this report
grand_total[cost] = 0; # holds the total costs for this report
grand_total[fees] = 0; # holds the total costs for this report

bet_cnt = 0; # convenient temp hold of # of bets for a P L T
bet_str = ""; # bet_cnt with leading spaces if any are needed

amount = 0; # holds The cost calc-ed for a given P L T
amt_str = ""; # amount with leading spaces if any are needed

fee = 0; # holds The FEE calc-ed for a given P L T
fee_str = ""; # fee with leading spaces if any are needed

bet_test = 0; # the modulo of bet_cnt & bets_per_buy
bet_fix = 0; # bets_per_buy - bet_cnt

bets_per_buy[1] = 0; # is an array, 1 rec for each lotto_id
cost_per_buy[1] = 0; # is an array, 1 rec for each lotto_id
fee_per_buy[1] = 0; # is an array, 1 rec for each lotto_id

lotto_list[1] = ""; # is an array holding a list of the lottery IDs
lotto_fn[1] = ""; # is an array holding the lotto's fullname
dot_fill[1] = 0; # a string of dots for filler

lotto_name = ""; # holds the current lotto_fn[cur_L]
lotto_name_fill = ""; # holds lotto_name & dot_fill concatnated

num_L = 0; # the total number of entries in lotto_list
}

#
# the Body of the program, the following (to the END) will be executed
# for each record in the input data file.
#
# select the records that have "LI"
# to build the lotto_list & bets/cost_per_buy arrays.
# the records should be formated as:
# "LI" string constant used to ID this rec
# lotto_id
# num_picks
# cost_picks

$1 ~ /^LI$/ { # if field 1 has that text
if (bets_per_buy[$2] == 0) # if bp is 0 then this is a new ID
{ # which they all should be
num_L++;
lotto_list[num_L] = $2; # the lotto_id
lotto_fn[$2] = $6; # the full name

dot_fill[$2] = ""; # build dot_fill
for (i=length(lotto_fn[$2]); i < 16; i++)
dot_fill[$2] = (dot_fill[$2] ".");
}

bets_per_buy[$2] = $3 + 0; # the '+ 0' says this IS a number
cost_per_buy[$2] = $4 + 0;
fee_per_buy[$2] = $5 + 0;
}


# select the records that do NOT have "LI"
# to build the PLT_count array & PLT_list
# the reocrds should be formated as:
# transaction_number
# lotto_id
# play_date
# (don't care about the rest of the record)
#
# also append record to the appropiate cut-file


$1 !~ /^LI$/ { # if field 1 does NOT have that text
if (NF > 0 && substr($1, 1, 1) != "#") # and this is NOT a blank line
{ # or comment line
sub_PLT = ($3 $2 substr($1, 1, 5)); # build the P L T subscript

if (PLT_count[sub_PLT] == 0) # is this a 'NEW' P L T combnation?
{
num_PLT++; # increment the list counter
PLT_list[num_PLT] = sub_PLT; # add the combnation to the list
}

PLT_count[sub_PLT]++; # tally the bet for this P L T
}
}

# to display every input record
# { print $0 ">"$1"< {"NF"}" }
# <<< just for testing

#
# the END section- where the report is generated
#

END {
flipflag = TRUE; # sort the list of P L T comb
i = 1;
while ((i < num_PLT) && (flipflag == TRUE)) # if flipflag == FALSE then a
{ # pass was made through the
flipflag = FALSE; # whole list without doing a
for (j=1; j < num_PLT; j++) # swap - the sort is done
if (PLT_list[j] > PLT_list[j+1])
{
flipflag = TRUE;
hold = PLT_list[j];
PLT_list[j] = PLT_list[j+1];
PLT_list[j+1] = hold;
}
}

cur_P = substr(PLT_list[1], 1, 8); # init the break checks
cur_L = substr(PLT_list[1], 9, 4);
lotto_name = lotto_fn[cur_L]
lotto_name_fill = (lotto_name dot_fill[cur_L])

# print headers
print "SUMMARY REPORT by Play Date / Lottery / Terminal ID";

if (what_do == "IN")
print " For Data Coming in from Terminal " what_term
else
print " For Lottery Cards being printed"

print " ";
print " for Play Date of " cur_P;
print " for Lottery of " lotto_name;
print " Terminal # of Bets Cost Fee";
# 1234 12345 $12345.67 $12345.67
#123456789-123456789-123456789-123456789-123456789-123456789


for (i=1; i <= num_PLT; i++) # walk the PLT_list
{
if (cur_P != substr(PLT_list[i], 1, 8) # see if play date changed
{ # if changed - do a total (2)
bet_str = sprintf("%6d", PL_total[bets]);
amt_str = sprintf("%8.2f", PL_total[cost]);
fee_str = sprintf("%8.2f", PL_total[fees]);

print " "; # 1st a total line for lotto_id
print " " lotto_name_fill bet_str " $" amt_str " $" fee_str;
print " ";

P_total[bets] += PL_total[bets]; # finished build the next total
P_total[cost] += PL_total[cost];
P_total[fees] += PL_total[fees];

PL_total[bets] = 0; # zero the previous total
PL_total[cost] = 0;
PL_total[fees] = 0;


bet_str = sprintf("%6d", P_total[bets]);
amt_str = sprintf("%8.2f", P_total[cost]);
fee_str = sprintf("%8.2f", P_total[fees]);

print " "; # 2nd a total line for play_date
print " " cur_P TO11 bet_str " $" amt_str " $" fee_str;
print " ";
print " ";

cur_P = substr(PLT_list[i], 1, 8); # reset the break checks
cur_L = substr(PLT_list[i], 9, 4);

print " for Play Date of " cur_P; # print play_date headers
lotto_name = lotto_fn[cur_L]
lotto_name_fill = (lotto_name dot_fill[cur_L])
print " for Lottery of " lotto_name;
print " Terminal # of Bets Cost Fee";

grand_total[bets] += P_total[bets]; # build the next breaks totals
grand_total[cost] += P_total[cost];
grand_total[fees] += P_total[fees];

P_total[bets] = 0; # zero the current totals
P_total[cost] = 0;
P_total[fees] = 0;
}

if (cur_L != substr(PLT_list[i], 9, 4)) # see if lotto_id changed
{ # if changed - do a total (1)
bet_str = sprintf("%6d", PL_total[bets]);
amt_str = sprintf("%8.2f", PL_total[cost]);
fee_str = sprintf("%8.2f", PL_total[fees]);

print " ";
print " " lotto_name_fill bet_str " $" amt_str " $" fee_str;
print " ";
print " ";

cur_L = substr(PLT_list[i], 9, 4); # reset the break check

# print lotto_id header
lotto_name = lotto_fn[cur_L]
lotto_name_fill = (lotto_name dot_fill[cur_L])
print " for Lottery of " lotto_name;

P_total[bets] += PL_total[bets]; # build the next break's totals
P_total[cost] += PL_total[cost];
P_total[fees] += PL_total[fees];

PL_total[bets] = 0; # zero current break's totals
PL_total[cost] = 0;
PL_total[fees] = 0;
}
# print terminal_id line

cur_T = substr(PLT_list[i], 13, 5); # print the P L T tally

bet_cnt = PLT_count[PLT_list[i]];

bet_test = bet_cnt % bets_per_buy[cur_L]; # make sure the bet count
if (bet_test !=0) # is a proper number
{
bet_fix = bets_per_buy[cur_L] - bet_test;
print " ";
print " ERROR - improper number of bets ";
print " Play Date: " cur_P;
print " Lottery ID: " cur_L;
print " Terminal ID: " cur_T;
print " number of bets is " bet_cnt;
print " this is not evenly divable by " bets_per_buy[cur_L];
print " there is " bet_fix " missing";
print " will calulate as though they existed";
print " ";
bet_cnt += bet_fix; # round bet_cnt UP
}

# build & print the "terminal" total lines
bet_str = sprintf("%5d", bet_cnt);
amount = (bet_cnt / bets_per_buy[cur_L]) * cost_per_buy[cur_L];
amt_str = sprintf("%8.2f", amount);

fee = (bet_cnt / bets_per_buy[cur_L]) * fee_per_buy[cur_L];
fee_str = sprintf("%8.2f", fee);

print SP11 cur_T " " bet_str " $" amt_str " $" fee_str;

PL_total[bets] += bet_cnt; # build the next break's totals
PL_total[cost] += amount;
PL_total[fees] += fee;

} # endof for i=1; i <= num_PLT; i++
# do final total lines (3)

bet_str = sprintf("%6d", PL_total[bets]);
amt_str = sprintf("%8.2f", PL_total[cost]);
fee_str = sprintf("%8.2f", PL_total[fees]);

print " "; # 1st a total line for lotto_id
print " " lotto_name_fill bet_str " $" amt_str " $" fee_str;
print " ";

P_total[bets] += PL_total[bets]; # finished build the next total
P_total[cost] += PL_total[cost];
P_total[fees] += PL_total[fees];


bet_str = sprintf("%6d", P_total[bets]);
amt_str = sprintf("%8.2f", P_total[cost]);
fee_str = sprintf("%8.2f", P_total[fees]);

print " "; # 2nd a total line for play_date
print " " cur_P TO11 bet_str " $" amt_str " $" fee_str;
print " ";

grand_total[bets] += P_total[bets]; # finished build the next totals
grand_total[cost] += P_total[cost];
grand_total[fees] += P_total[fees];


bet_str = sprintf("%6d", grand_total[bets]);
amt_str = sprintf("%8.2f", grand_total[cost]);
fee_str = sprintf("%8.2f", grand_total[fees]);

print " "; # 3rd a total line for GRAND
print " ";
print " GRAND TOTAL: " bet_str " $" amt_str " $" fee_str;
}

# end of program