Download the plain text (non-html) version of this script.
#! /bin/sh
##########################################################################
# Title : urlgetopt - decode "urlencoded" forms data (RFC 1866)
# Author : Heiner Steven <heiner.steven@odn.de>
# Date : 1999-06-27
# Category : CGI, File Conversion
# Requires : -
# SCCS-Id. : @(#) urlgetopt 1.6 05/04/20
##########################################################################
# usage
# urlgetopt [-l] [-p prefix] [urlencoded-data]
# -l: print fields on a separate line
#
# Description
# Data entered within a HTML-FORM is transfered to the HTTP-Server
# using the "application/x-www-form-urlencoded" encoding type
# This script decodes "form-urlencoded" data in a form
# suitable to be used within shell scripts
#
# Example:
# The form-urlencoded string
# name=Heiner+Steven&org=Heiner%27s+SHELLdorado
#
# will be decoded to
# name='Heiner Steven'
# org='Heiner'"'"'s SHELLdorado'
#
# usage example (i.e. from within a CGI script):
#
# eval `urlgetopt -p FORM_ -l "$QUERY_STRING"`
#
# Without "eval":
#
# OIFS=$IFS
# IFS="\n"
# set -- `urlgetopt -l "$QUERY_STRING"`
# IFS=$OIFS
# echo $# arguments:
# for arg
# do
# echo "<$arg>"
# done
#
# Notes
# o Assumes form-data in the format "name=value", but should
# handle other values as well
# o An apostrophe ' within apostrophes cannot be written as '\'' (in sh,
# bash, ksh88), so we use the work around ' "'" '. Using ksh93,
# we could just use $'\''.
# o If the last character of a value in an assignment is an
# apostrophe ('), the printed assignment will contain two
# superfluous apostrophes ('').
#
# Portability
# Solaris 9 "nawk"
# Linux 2.4 "awk" (= GNU awk) 3.1.0
#
# Changes:
# 2005-04-20 claudio allow values with leading whitespace (thanks to
# Claudio Jolowicz <claudio@jolowicz.com>) (1.6)
# 2002-03-26 heiner Handle a "-n" argument correctly (1.4)
# Thanks to Brian Hiles <bsh@rawbw.com> for
# pointing this out.
##########################################################################
PN=`basename "$0"` # Program name
VER="1.6"
# Set the following variable to shorten startup time. Otherwise the
# directories from the PATH variable will be searched for a suitable AWK
# implementation
#: ${NAWK=nawk} # Set this variable to speed up startup
usage () {
echo >&2 "$PN - decode \"urlencoded\" CGI form data, $VER
usage: $PN [-lF] [-p prefix] [urlencoded-string]
-l: print fields on a separate line
-p: prefix for generated environment variable names
-F: force output of invalid assignments
Example:
eval \`urlgetopt -l \"\$QUERY_STRING\"\`"
exit 1
}
# We use "getopts" instead of "getopt" to preserve whitespace within
# arguments.
VarPrefix=
LongFormat=no
EvalCheck=true
while getopts :hFlp: opt
do
case "$opt" in
F) EvalCheck=false;;
h) usage;;
l) LongFormat=yes;;
p) VarPrefix=$OPTARG;;
?) usage;;
esac
done
shift `expr $OPTIND - 1`
# We need a new AWK program supporting the "gsub()" function.
# Most AWKs (i.e. "gawk") do support it, but some older NAWKs do not.
# Search for a "gsub" capable NAWK using the current PATH.
if [ X"$NAWK" = X ]
then
for path in `echo "$PATH" | sed 's|^:|./ |;s|:$| ./|;s|:| |g'`
do
for awk in $path/*awk
do
[ -f "$awk" -a -x "$awk" ] || continue
case "$awk" in
*.*) continue; # ignore "script.awk"
esac
# Now check for the "gsub()" function
result=`echo "UU" | $awk '{ gsub (/U/, "X"); print }' 2>/dev/null`
[ X"$result" = X"XX" ] || continue
NAWK=$awk; break 2 # Found!
done
done
: ${NAWK:=awk}
fi
if [ $# -gt 0 ]
then # Process arguments, if specified...
# Special handling: BSD derived "echo" implementations
# will not echo "-n", but omit the trailing newline. "echo --"
# doesn't work as expected, too, so we add a non-dash character
# as the first character to the "echo", and remove it later on.
echo X"$@" |
sed "s/^X//"
else # ...otherwise read standard input
cat
fi |
$NAWK -F'[&]' '
BEGIN {
FieldSep = ("'"$LongFormat"'" == "yes") ? "\n" : " "
VarPrefix = "'"$VarPrefix"'"
evalcheck = ("'"$EvalCheck"'" == "true")
Hex ["0"] = 0; Hex ["1"] = 1; Hex ["2"] = 2; Hex ["3"] = 3;
Hex ["4"] = 4; Hex ["5"] = 5; Hex ["6"] = 6; Hex ["7"] = 7;
Hex ["8"] = 8; Hex ["9"] = 9; Hex ["A"] = 10; Hex ["B"] = 11;
Hex ["C"] = 12; Hex ["D"] = 13; Hex ["E"] = 14; Hex ["F"] = 15;
squote = sprintf ("%c", 39)
exitcode = 0
}
{
gsub (/\+/, " ");
# Parse assignment separated by "&", e.g. "a=b&c=d&..."
for ( field=1; field<=NF; ++field ) {
# Substitute %HH with the "real" character
if ( $field ~ /%[0-9A-F][0-9A-F]/ ) {
newfield = ""
for ( i=1; i<=length ($field); i++ ) {
if ( substr ($field, i, 1) == "%" ) {
dec = Hex [substr ($field, i+1, 1)] * 16 + \
Hex [substr ($field, i+2, 1)]
newfield = sprintf ("%s%c", newfield, dec)
i += 2;
} else {
newfield = newfield substr ($field, i, 1);
}
}
$field = newfield
}
#print "after % processing " $0
# Decode "var=value" pairs.
#
# If "evalcheck" is true, we ensure that the result is a
# valid variable assignment:
# 1. it has the form varname=value
# 2. the variable name must be a valid shell
# identifier: start with letter or underscore, followed
# only be letters, underscores or digits
if (evalcheck && !match ($field, /^[a-zA-Z_][a-zA-Z_0-9]*=/)) {
print "invalid assignment: " $field | "cat >&2"
exit (exitcode=1);
}
# Now parse assignments of the form "a=b".
if ( $field ~ /\=/ ) {
newfield = ""
equalseen = 0
fieldlength = length ($field)
for ( i=1; i<=fieldlength; i++ ) {
s = substr ($field, i, 1)
if ( s == "=" ) {
if ( !equalseen ) s = s squote
equalseen = 1
} else if ( equalseen ) { # value
if ( s == squote ) {
# Special handling: since an apostrophe
# (in this example represented by a dot
# .) cannot be quoted using a backslash,
# we use the following work around: we
# replace it with "."
if ( i<fieldlength ) {
s = squote "\"" squote "\"" squote
} else {
# Special handling: if the last character
# is a single quote, we do not want to add
# "squote" to the end, because this would
# result in two trailing superfluous
# apostrophes.
}
}
}
newfield = newfield s
}
if ( s != squote ) {
$field = newfield squote
} else {
# Special handling: last character was an apostrophe:
$field = newfield "\"" squote "\""
}
} else if ( evalcheck ) {
# We cannot get here...
print "invalid assignment: " $field | "cat >&2"
exit (exitcode=1)
}
#print "after = processing " $0
}
for ( i=1; i<=NF; ++i ) {
printf ("%s%s", VarPrefix, $i)
if ( i<NF ) printf (FieldSep); else printf ("\n");
}
}
END {
exit (exitcode)
}
'