| #! /bin/sh |
| ## The web sucks. It is a mighty dismal kludge built out of a thousand |
| ## tiny dismal kludges all band-aided together, and now these bottom-line |
| ## clueless pinheads who never heard of "TCP handshake" want to run |
| ## *commerce* over the damn thing. Ye godz. Welcome to TV of the next |
| ## century -- six million channels of worthless shit to choose from, and |
| ## about as much security as today's cable industry! |
| ## |
| ## Having grown mightily tired of pain in the ass browsers, I decided |
| ## to build the minimalist client. It doesn't handle POST, just GETs, but |
| ## the majority of cgi forms handlers apparently ignore the method anyway. |
| ## A distinct advantage is that it *doesn't* pass on any other information |
| ## to the server, like Referer: or info about your local machine such as |
| ## Netscum tries to! |
| ## |
| ## Since the first version, this has become the *almost*-minimalist client, |
| ## but it saves a lot of typing now. And with netcat as its backend, it's |
| ## totally the balls. Don't have netcat? Get it here in /src/hacks! |
| ## _H* 950824, updated 951009 et seq. |
| ## |
| ## args: hostname [port]. You feed it the filename-parts of URLs. |
| ## In the loop, HOST, PORT, and SAVE do the right things; a null line |
| ## gets the previous spec again [useful for initial timeouts]; EOF to exit. |
| ## Relative URLs behave like a "cd" to wherever the last slash appears, or |
| ## just use the last component with the saved preceding "directory" part. |
| ## "\" clears the "filename" part and asks for just the "directory", and |
| ## ".." goes up one "directory" level while retaining the "filename" part. |
| ## Play around; you'll get used to it. |
| |
| if test "$1" = "" ; then |
| echo Needs hostname arg. |
| exit 1 |
| fi |
| umask 022 |
| |
| # optional PATH fixup |
| # PATH=${HOME}:${PATH} ; export PATH |
| |
| test "${PAGER}" || PAGER=more |
| BACKEND="nc -v -w 15" |
| TMPAGE=/tmp/web$$ |
| host="$1" |
| port="80" |
| if test "$2" != "" ; then |
| port="$2" |
| fi |
| |
| spec="/" |
| specD="/" |
| specF='' |
| saving='' |
| |
| # be vaguely smart about temp file usage. Use your own homedir if you're |
| # paranoid about someone symlink-racing your shell script, jeez. |
| rm -f ${TMPAGE} |
| test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1 |
| |
| # get loopy. Yes, I know "echo -n" aint portable. Everything echoed would |
| # need "\c" tacked onto the end in an SV universe, which you can fix yourself. |
| while echo -n "${specD}${specF} " && read spec ; do |
| case $spec in |
| HOST) |
| echo -n 'New host: ' |
| read host |
| continue |
| ;; |
| PORT) |
| echo -n 'New port: ' |
| read port |
| continue |
| ;; |
| SAVE) |
| echo -n 'Save file: ' |
| read saving |
| # if we've already got a page, save it |
| test "${saving}" && test -f ${TMPAGE} && |
| echo "=== ${host}:${specD}${specF} ===" >> $saving && |
| cat ${TMPAGE} >> $saving && echo '' >> $saving |
| continue |
| ;; |
| # changing the logic a bit here. Keep a state-concept of "current dir" |
| # and "current file". Dir is /foo/bar/ ; file is "baz" or null. |
| # leading slash: create whole new state. |
| /*) |
| specF=`echo "${spec}" | sed 's|.*/||'` |
| specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'` |
| spec="${specD}${specF}" |
| ;; |
| # embedded slash: adding to the path. "file" part can be blank, too |
| */*) |
| specF=`echo "${spec}" | sed 's|.*/||'` |
| specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'` |
| ;; |
| # dotdot: jump "up" one level and just reprompt [confirms what it did...] |
| ..) |
| specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'` |
| continue |
| ;; |
| # blank line: do nothing, which will re-get the current one |
| '') |
| ;; |
| # hack-quoted blank line: "\" means just zero out "file" part |
| '\') |
| specF='' |
| ;; |
| # sigh |
| '?') |
| echo Help yourself. Read the script fer krissake. |
| continue |
| ;; |
| # anything else is taken as a "file" part |
| *) |
| specF=${spec} |
| ;; |
| esac |
| |
| # now put it together and stuff it down a connection. Some lame non-unix |
| # http servers assume they'll never get simple-query format, and wait till |
| # an extra newline arrives. If you're up against one of these, change |
| # below to (echo GET "$spec" ; echo '') | $BACKEND ... |
| spec="${specD}${specF}" |
| echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE} |
| ${PAGER} ${TMPAGE} |
| |
| # save in a format that still shows the URLs we hit after a de-html run |
| if test "${saving}" ; then |
| echo "=== ${host}:${spec} ===" >> $saving |
| cat ${TMPAGE} >> $saving |
| echo '' >> $saving |
| fi |
| done |
| rm -f ${TMPAGE} |
| exit 0 |
| |
| ####### |
| # Encoding notes, finally from RFC 1738: |
| # %XX -- hex-encode of special chars |
| # allowed alphas in a URL: $_-.+!*'(), |
| # relative names *not* described, but obviously used all over the place |
| # transport://user:pass@host:port/path/name?query-string |
| # wais: port 210, //host:port/database?search or /database/type/file? |
| # cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz |
| # ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords! |
| # local access-ctl files: ncsa: .htaccess ; cern: .www_acl |
| ####### |
| # SEARCH ENGINES: fortunately, all are GET forms or at least work that way... |
| # multi-word args for most cases: foo+bar |
| # See 'websearch' for concise results of this research... |