See also Sed Command and Grep Command
Awk is a very powerfull line editor filter or small programming language available in all unix style operating systems. This page contains small tutorial awk scripts and snippets.
<!----------------------------------------------------------------------------->
Items on the BEGIN line happen before any lines are processed, so we set the standard FS (field separator) character to a :, then set a maxuid variable to 0. Then if the 3rd column (the uuid in /etc/passwd) is greater that maxuid set it to the new maxuid and set the maxname variable to the lines username which is column 1. Items on the END line happen after all lines are processed.
maxuid.awk
#!/usr/bin/awk -f BEGIN { FS = ":"; maxuid = 0 } $3 > maxuid { maxuid = $3; maxname = $1 } END { print maxname ": " maxuid }
Since we added the awk shebang in the file, we can just run it with ./maxuid.awk /etc/passwd, if we hadn't added the shebang we could run it with awk like awk -f maxuid.awk /etc/passwd
Awk usually works on one line, but what if we have data like this:
/tmp/test.txt
Michael Jackson 555-5551 Kevin Jones 555-5552
To display
Michael 555-5551 Kevin 555-5552
ps -ef shows processes and how long they have been running in the hh:mm:ss format, lets print just the second column (the PID) and the running time but convert time into seconds
seconds.awk
{ split($7, hms, ":") secs = (hms[1] * 3600) + (hms[2] * 60) + hms[3] printf "%6d %5d\n", $2, secs }
Notice that count is not a built in function it's an array.
totalmem.awk
$1 != "USER" { count[$1]++; tot[$1] += $6 } END { for (user in tot) printf "%8s: %4d %8d\n", user, count[user], tot[user] }
Awk for haproxy log, breaks out into columns by date, host, status, speed, size and page
#!/usr/bin/awk -f BEGIN { FS = " " # Output as CSV csv=0 } { client=$6 date=$7 time=substr(date, 14, 12) backend=$9 split($10, timers, "/") tt=timers[4] status=$11 size=$12 / 1024 #in kb termination=$15 split($16, conns, "/") host=substr($18, 2, length($18)-2) subdomain=substr(host, 1, index(host, ".")-1) request=$20 page=getPage(request) #out(date, 26) out(time, 12) #out(substr(host, 1, 30), 30) if (csv == 1) out(subdomain, 20) else out(substr(subdomain, 1, 20), 20) out(status, 3) #out(client, 21) out("["termination"]", 4) out(size, 6.1, "f") out(tt, 5, "d") out(page) printf("\n") } function out(data, pad, type) { if (type == "") type = "s" if (csv == 1) printf("%s", "\""data"\",") else printf("%-"pad""type" ", data) } function getPage(request) { if (index(request, "?") > 0) return substr(request, 1, index(request, "?")-1) else return request }
#!/usr/bin/gawk -f BEGIN { FS = "\",\"" } { date = substr($1, 2, length($1)-1) user = $2 session = $3 socket = $4 ip = $5 message = substr($6, 1, length($6)-1) lines ++; if (match(message, "Connected using")) { # User connected users[user]["connected"] = 1 if (match(message, "websocket transport")) { users[user]["transport"] = "websocket" } else { users[user]["transport"] = "xhr" } } else if (match(message, "Disconnected")) { # User disconnected users[user]["connected"] = 0 } else if (match(message, "Sending message")) { sentMessages ++; recipients[user] = 1; } else if (match(message, "invalid username or password")) { unauthorized ++; } else if (match($0, "socket.io started")) { restarts ++; } else if (match(message, "Error:")) { errors["errors"] ++; if (match(message, "data returned from SSO query not valid JSON")) { errors["sso-not-json"] ++; } else if (match(message, "could not http")) { errors["dns"] ++; } else if (match(message, "EMFILE")) { errors["emfile"] ++; } else if (match(message, "ENOTFOUND")) { errors["enotfound"] ++; } } } END { # users is now a distinct associative array of information for (u in users) { if (users[u]["connected"] == 1) { connectedUsers ++; if (users[u]["transport"] == "websocket") { websocketUsers ++; } else { xhrUsers ++; } } else { disconnectedUsers ++; } } print("DynaComm Log Information"); print("------------------------"); printf(" Sent Messages: %d\n", sentMessages); printf("Distinct Recipients: %d\n", length(recipients)); printf(" Connected Users: %d\n", connectedUsers); printf(" Websocket Users: %d\n", websocketUsers); printf(" Xhr-Polling Users: %d\n", xhrUsers); printf(" Disconnected Users: %d\n", disconnectedUsers); printf(" Unauthorized: %d\n", unauthorized); printf(" Log Lines: %d\n", lines); print("\n\nDynacomm Log Issues"); print("------------------------"); printf(" Total: %d\n", errors["errors"]); printf(" SSO Not JSON: %d\n", errors["sso-not-json"]); printf(" SSO DNS: %d\n", errors["dns"]); printf(" EMFILE: %d\n", errors["emfile"]); printf(" ENOTFOUND: %d\n", errors["enotfound"]); printf(" Restarts: %d\n", restarts); }