realspirituals - 5 months ago 14

Bash Question

I have an

`input`

`child, parent, val`

1 , 0 , a

2 , 1 , b

3 , 1 , c

4 , 2 , d

5 , 2 , e

I need to store them in an array named

`data_array`

`BEGIN {`

while (getline < "input")

{

split($0,ft,",");

child=ft[1];

parent=ft[2];

value=ft[3];

#need help here in assigning two values into the array

data_array[child]=parent,value;

}

close("input");

}

The result_array holds the parent to child relationship with ordering.

`result_array[parent]="all children separated by comma"`

For example, parent 0 has one child called 1. Parent 1 has two children called 2, and 3.

The order of 2 and 3 are determined by alphabetically sorting the corresponding values.

Since the sorting of values results in

`b`

`c`

There could be any number of children.

Childless nodes must be written with blank content.

These results must go into the final array in the following format.

`result_array["0"] = "1"`

result_array["1"] = "2,3"

result_array["2"] = "4,5"

result_array["3"] = ""

result_array["4"] = ""

result_array["5"] = ""

Please shout if this is unclear.

@Edmorton Please have a look, it doesnt seems to be a copy paste mistake!!

`cat inputfile`

child, parent, val

1 , 0 , a

2 , 1 , b

3 , 1 , c

4 , 2 , d

5 , 2 , e

cat tst.awk

BEGIN { FS=" *, *" }

NR==1 { for (i=1;i<=NF;i++) f[$i]=i; next }

{ parentsChildren2Vals[$(f["par"])][$(f["Chl"])] = $(f["val"]) }

END {

for (parent in parentsChildren2Vals) {

PROCINFO["sorted_in"] = "@val_str_asc"

for (child in parentsChildren2Vals[parent]) {

parents2children[parent] = (parent in parents2children ?

parents2children[parent] "," : "") child

children[child]

}

}

for (child in children) {

parents2children[child]

}

PROCINFO["sorted_in"] = "@ind_num_asc"

for (parent in parents2children) {

printf "parents2children[\"%s\"] = \"%s\"\n", parent, parents2children[parent]

}

}

gawk -f tst.awk inputfile

parents2children[""] = ""

parents2children["1 , 0 , a"] = "1 , 0 , a"

parents2children["2 , 1 , b"] = "2 , 1 , b"

parents2children["3 , 1 , c"] = "3 , 1 , c"

parents2children["4 , 2 , d"] = "4 , 2 , d"

parents2children["5 , 2 , e"] = "5 , 2 , e"

If I do this with a bigger dataset (say 30 records), the results were confusing

`gawk -f tst.awk input`

parents2children["1"] = ""

parents2children["1, 0, 'a' "] = "1"

parents2children["2"] = ""

parents2children["2, 1, 'b' "] = "2"

parents2children["3"] = ""

parents2children["3, 1, 'c' "] = "3"

parents2children["4"] = ""

parents2children["4, 2, 'd' "] = "4"

parents2children["5"] = ""

parents2children["5, 3, 'e' "] = "5"

parents2children["6"] = ""

parents2children["6, 4, 'f' "] = "6"

parents2children["7"] = ""

parents2children["7, 4, 'g' "] = "7"

parents2children["8"] = ""

parents2children["8, 5, 'h' "] = "8"

parents2children["9"] = ""

parents2children["9, 5, 'i' "] = "9"

parents2children["10"] = ""

parents2children["10, 6, 'j' "] = "10"

parents2children["11"] = ""

parents2children["11, 6, 'k' "] = "11"

parents2children["12"] = ""

parents2children["12, 6, 'l' "] = "12"

parents2children["13"] = ""

parents2children["13, 8, 'm' "] = "13"

parents2children["14"] = ""

parents2children["14, 8, 'n' "] = "14"

parents2children["15"] = ""

parents2children["15, 8, 'o' "] = "15"

parents2children["16"] = ""

parents2children["16, 10, 'p' "] = "16"

parents2children["17"] = ""

parents2children["17, 14, 'q' "] = "17"

parents2children["18"] = ""

parents2children["18, 16, 'r' "] = "18"

parents2children["19"] = ""

parents2children["19, 17, 's' "] = "19"

parents2children["20"] = ""

parents2children["20, 18, 'tc'"] = "20"

parents2children["21"] = ""

parents2children["21, 18, 'tb'"] = "21"

parents2children["22"] = ""

parents2children["22, 18, 'ta'"] = "22"

parents2children["23"] = ""

parents2children["23, 19, 'ub'"] = "23"

parents2children["24"] = ""

parents2children["24, 19, 'ua'"] = "24"

parents2children["25"] = ""

parents2children["25, 19, 'uc'"] = "25"

parents2children["26"] = ""

parents2children["26, 20, 'va'"] = "26"

parents2children["27"] = ""

parents2children["27, 20, 'vc'"] = "27"

parents2children["28"] = ""

parents2children["28, 20, 'vb'"] = "28"

parents2children["29"] = ""

parents2children["29, 24, 'w' "] = "29"

parents2children["30"] = ""

parents2children["30, 27, 'x' "] = "30"

Answer

With GNU awk for true multi-dimensional arrays and sorted_in:

```
$ cat tst.awk
BEGIN { FS=" *, *" }
NR==1 { for (i=1;i<=NF;i++) f[$i]=i; next }
{ parentsChildren2Vals[$(f["parent"])][$(f["child"])] = $(f["val"]) }
END {
for (parent in parentsChildren2Vals) {
PROCINFO["sorted_in"] = "@val_str_asc"
for (child in parentsChildren2Vals[parent]) {
parents2children[parent] = (parent in parents2children ?
parents2children[parent] "," : "") child
children[child]
}
}
for (child in children) {
parents2children[child]
}
PROCINFO["sorted_in"] = "@ind_num_asc"
for (parent in parents2children) {
printf "parents2children[\"%s\"] = \"%s\"\n", parent, parents2children[parent]
}
}
$ awk -f tst.awk file
parents2children["0"] = "1"
parents2children["1"] = "2,3"
parents2children["2"] = "4,5"
parents2children["3"] = ""
parents2children["4"] = ""
parents2children["5"] = ""
```