3 # Client-side HTTP for GET, POST, and HEAD commands.
4 # These routines can be used in untrusted code that uses
5 # the Safesock security policy. These procedures use a
6 # callback interface to avoid using vwait, which is not
7 # defined in the safe base.
9 # See the file "license.terms" for information on usage and
10 # redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
12 # RCS: @(#) $Id: http.tcl,v 1.32 2000/04/22 07:07:59 sandeep Exp $
14 # Rough version history:
15 # 1.0 Old http_get interface
16 # 2.0 http:: namespace and http::geturl
17 # 2.1 Added callbacks to handle arriving data, and timeouts
18 # 2.2 Added ability to fetch into a channel
19 # 2.3 Added SSL support, and ability to post from a channel
20 # This version also cleans up error cases and eliminates the
21 # "ioerror" status in favor of raising an error
23 package provide http 2.3
31 -useragent {Tcl http client package 2.3}
32 -proxyfilter http::ProxyRequired
36 variable alphanumeric a-zA-Z0-9
39 for {} {$i <= 256} {incr i} {
41 if {![string match \[$alphanumeric\] $c]} {
42 set formMap($c) %[format %.2x $i]
45 # These are handled specially
55 namespace export geturl config reset wait formatQuery register unregister
56 # Useful, but not exported: data size status code
61 # See documentaion for details.
64 # proto URL protocol prefix, e.g. https
65 # port Default port for protocol
66 # command Command to use to create socket
68 # list of port and command that was registered.
70 proc http::register {proto port command} {
72 set urlTypes($proto) [list $port $command]
77 # Unregisters URL protocol handler
80 # proto URL protocol prefix, e.g. https
82 # list of port and command that was unregistered.
84 proc http::unregister {proto} {
86 if {![info exists urlTypes($proto)]} {
87 return -code error "unsupported url type \"$proto\""
89 set old $urlTypes($proto)
90 unset urlTypes($proto)
96 # See documentaion for details.
99 # args Options parsed by the procedure.
103 proc http::config {args} {
105 set options [lsort [array names http -*]]
106 set usage [join $options ", "]
107 if {[llength $args] == 0} {
109 foreach name $options {
110 lappend result $name $http($name)
114 regsub -all -- - $options {} options
115 set pat ^-([join $options |])$
116 if {[llength $args] == 1} {
117 set flag [lindex $args 0]
118 if {[regexp -- $pat $flag]} {
121 return -code error "Unknown option $flag, must be: $usage"
124 foreach {flag value} $args {
125 if {[regexp -- $pat $flag]} {
126 set http($flag) $value
128 return -code error "Unknown option $flag, must be: $usage"
136 # Clean up the socket and eval close time callbacks
139 # token Connection token.
140 # errormsg (optional) If set, forces status to error.
141 # skipCB (optional) If set, don't call the -command callback. This
142 # is useful when geturl wants to throw an exception instead
143 # of calling the callback. That way, the same error isn't
144 # reported to two places.
149 proc http::Finish { token {errormsg ""} {skipCB 0}} {
152 global errorInfo errorCode
153 if {[string length $errormsg] != 0} {
154 set state(error) [list $errormsg $errorInfo $errorCode]
155 set state(status) error
157 catch {close $state(sock)}
158 catch {after cancel $state(after)}
159 if {[info exists state(-command)] && !$skipCB} {
160 if {[catch {eval $state(-command) {$token}} err]} {
161 if {[string length $errormsg] == 0} {
162 set state(error) [list $err $errorInfo $errorCode]
163 set state(status) error
166 if {[info exist state(-command)]} {
167 # Command callback may already have unset our state
168 unset state(-command)
175 # See documentaion for details.
178 # token Connection token.
184 proc http::reset { token {why reset} } {
187 set state(status) $why
188 catch {fileevent $state(sock) readable {}}
189 catch {fileevent $state(sock) writable {}}
191 if {[info exists state(error)]} {
192 set errorlist $state(error)
194 eval error $errorlist
200 # Establishes a connection to a remote url via http.
203 # url The http URL to goget.
204 # args Option value pairs. Valid options include:
205 # -blocksize, -validate, -headers, -timeout
207 # Returns a token for this connection.
208 # This token is the name of an array that the caller should
209 # unset to garbage collect the state.
211 proc http::geturl { url args } {
215 # Initialize the state variable, an array. We'll return the
216 # name of this array as the token for the transaction.
218 if {![info exists http(uid)]} {
221 set token [namespace current]::[incr http(uid)]
226 # Process command options.
234 -type application/x-www-form-urlencoded
247 set options {-blocksize -channel -command -handler -headers \
248 -progress -query -queryblocksize -querychannel -queryprogress\
249 -validate -timeout -type}
250 set usage [join $options ", "]
251 regsub -all -- - $options {} options
252 set pat ^-([join $options |])$
253 foreach {flag value} $args {
254 if {[regexp $pat $flag]} {
256 if {[info exists state($flag)] && \
257 [string is integer -strict $state($flag)] && \
258 ![string is integer -strict $value]} {
260 return -code error "Bad value for $flag ($value), must be integer"
262 set state($flag) $value
265 return -code error "Unknown option $flag, can be: $usage"
269 # Make sure -query and -querychannel aren't both specified
271 set isQueryChannel [info exists state(-querychannel)]
272 set isQuery [info exists state(-query)]
273 if {$isQuery && $isQueryChannel} {
275 return -code error "Can't combine -query and -querychannel options!"
278 # Validate URL, determine the server host and port, and check proxy case
280 if {![regexp -nocase {^(([^:]*)://)?([^/:]+)(:([0-9]+))?(/.*)?$} $url \
281 x prefix proto host y port srvurl]} {
283 error "Unsupported URL: $url"
285 if {[string length $proto] == 0} {
287 set url ${proto}://$url
289 if {![info exists urlTypes($proto)]} {
291 return -code error "unsupported url type \"$proto\""
293 set defport [lindex $urlTypes($proto) 0]
294 set defcmd [lindex $urlTypes($proto) 1]
296 if {[string length $port] == 0} {
299 if {[string length $srvurl] == 0} {
302 if {[string length $proto] == 0} {
306 if {![catch {$http(-proxyfilter) $host} proxy]} {
307 set phost [lindex $proxy 0]
308 set pport [lindex $proxy 1]
311 # If a timeout is specified we set up the after event
312 # and arrange for an asynchronous socket connection.
314 if {$state(-timeout) > 0} {
315 set state(after) [after $state(-timeout) \
316 [list http::reset $token timeout]]
322 # If we are using the proxy, we must pass in the full URL that
323 # includes the server name.
325 if {[info exists phost] && [string length $phost]} {
327 set conStat [catch {eval $defcmd $async {$phost $pport}} s]
329 set conStat [catch {eval $defcmd $async {$host $port}} s]
333 # something went wrong while trying to establish the connection
334 # Clean up after events and such, but DON'T call the command callback
335 # (if available) because we're going to throw an exception from here
339 return -code error $s
343 # Wait for the connection to complete
345 if {$state(-timeout) > 0} {
346 fileevent $s writable [list http::Connect $token]
348 if {$state(status) != "connect"} {
350 # Likely to be connection timeout. If there was a connection
351 # error, (e.g., bad port), then http::wait will have
352 # raised an error already
359 # Send data in cr-lf format, but accept any line terminators
361 fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize)
363 # The following is disallowed in safe interpreters, but the socket
364 # is already in non-blocking mode in that case.
366 catch {fconfigure $s -blocking off}
369 set state(querylength) [string length $state(-query)]
370 if {$state(querylength) > 0} {
374 # there's no query data
378 } elseif {$state(-validate)} {
380 } elseif {$isQueryChannel} {
382 # The query channel must be blocking for the async Write to
384 fconfigure $state(-querychannel) -blocking 1 -translation binary
389 puts $s "$how $srvurl HTTP/1.0"
390 puts $s "Accept: $http(-accept)"
391 puts $s "Host: $host"
392 puts $s "User-Agent: $http(-useragent)"
393 foreach {key value} $state(-headers) {
394 regsub -all \[\n\r\] $value {} value
395 set key [string trim $key]
396 if {[string equal $key "Content-Length"]} {
398 set state(querylength) $value
400 if {[string length $key]} {
401 puts $s "$key: $value"
404 if {$isQueryChannel && $state(querylength) == 0} {
405 # Try to determine size of data in channel
406 # If we cannot seek, the surrounding catch will trap us
408 set start [tell $state(-querychannel)]
409 seek $state(-querychannel) 0 end
410 set state(querylength) \
411 [expr {[tell $state(-querychannel)] - $start}]
412 seek $state(-querychannel) $start
415 # Flush the request header and set up the fileevent that will
416 # either push the POST data or read the response.
420 # It is possible to have both the read and write fileevents active
421 # at this point. The only scenario it seems to affect is a server
422 # that closes the connection without reading the POST data.
423 # (e.g., early versions TclHttpd in various error cases).
424 # Depending on the platform, the client may or may not be able to
425 # get the response from the server because of the error it will
426 # get trying to write the post data. Having both fileevents active
427 # changes the timing and the behavior, but no two platforms
428 # (among Solaris, Linux, and NT) behave the same, and none
429 # behave all that well in any case. Servers should always read thier
430 # POST data if they expect the client to read their response.
432 if {$isQuery || $isQueryChannel} {
433 puts $s "Content-Type: $state(-type)"
435 puts $s "Content-Length: $state(querylength)"
438 fconfigure $s -translation {auto binary}
439 fileevent $s writable [list http::Write $token]
443 fileevent $s readable [list http::Event $token]
446 if {! [info exists state(-command)]} {
448 # geturl does EVERYTHING asynchronously, so if the user
449 # calls it synchronously, we just do a wait here.
454 # The socket probably was never connected,
455 # or the connection dropped later.
457 # Clean up after events and such, but DON'T call the command callback
458 # (if available) because we're going to throw an exception from here
463 return -code error $err
469 # Data access functions:
470 # Data - the URL data
471 # Status - the transaction status: ok, reset, eof, timeout
472 # Code - the HTTP transaction code, e.g., 200
473 # Size - the size of the URL data
475 proc http::data {token} {
480 proc http::status {token} {
483 return $state(status)
485 proc http::code {token} {
490 proc http::ncode {token} {
493 if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
499 proc http::size {token} {
502 return $state(currentsize)
507 # Garbage collect the state associated with a transaction
510 # token The token returned from http::geturl
513 # unsets the state array
515 proc http::cleanup {token} {
518 if {[info exist state]} {
525 # This callback is made when an asyncronous connection completes.
528 # token The token returned from http::geturl
531 # Sets the status of the connection, which unblocks
532 # the waiting geturl call
534 proc http::Connect {token} {
537 global errorInfo errorCode
538 if {[eof $state(sock)] ||
539 [string length [fconfigure $state(sock) -error]]} {
540 set state(status) error
541 set state(error) [list \
542 "connect failed [fconfigure $state(sock) -error]" \
543 $errorInfo $errorCode]
545 set state(status) connect
547 fileevent $state(sock) writable {}
552 # Write POST query data to the socket
555 # token The token for the connection
558 # Write the socket and handle callbacks.
560 proc http::Write {token} {
565 # Output a block. Tcl will buffer this if the socket blocks
570 # Catch I/O errors on dead sockets
572 if {[info exists state(-query)]} {
574 # Chop up large query strings so queryprogress callback
575 # can give smooth feedback
578 [string range $state(-query) $state(queryoffset) \
579 [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
580 incr state(queryoffset) $state(-queryblocksize)
581 if {$state(queryoffset) >= $state(querylength)} {
582 set state(queryoffset) $state(querylength)
587 # Copy blocks from the query channel
589 set outStr [read $state(-querychannel) $state(-queryblocksize)]
590 puts -nonewline $s $outStr
591 incr state(queryoffset) [string length $outStr]
592 if {[eof $state(-querychannel)]} {
597 # Do not call Finish here, but instead let the read half of
598 # the socket process whatever server reply there is to get.
600 set state(posterror) $err
605 fileevent $s writable {}
606 fileevent $s readable [list http::Event $token]
609 # Callback to the client after we've completely handled everything
611 if {[string length $state(-queryprogress)]} {
612 eval $state(-queryprogress) [list $token $state(querylength)\
619 # Handle input on the socket
622 # token The token returned from http::geturl
625 # Read the socket and handle callbacks.
627 proc http::Event {token} {
636 if {[string equal $state(state) "header"]} {
637 if {[catch {gets $s line} n]} {
640 set state(state) body
641 if {![regexp -nocase ^text $state(type)]} {
642 # Turn off conversions for non-text data
643 fconfigure $s -translation binary
644 if {[info exists state(-channel)]} {
645 fconfigure $state(-channel) -translation binary
648 if {[info exists state(-channel)] &&
649 ![info exists state(-handler)]} {
650 # Initiate a sequence of background fcopies
651 fileevent $s readable {}
655 if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
656 set state(type) [string trim $type]
658 if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
659 set state(totalsize) [string trim $length]
661 if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
662 lappend state(meta) $key [string trim $value]
663 } elseif {[regexp ^HTTP $line]} {
664 set state(http) $line
669 if {[info exists state(-handler)]} {
670 set n [eval $state(-handler) {$s $token}]
672 set block [read $s $state(-blocksize)]
673 set n [string length $block]
675 append state(body) $block
679 incr state(currentsize) $n
684 if {[info exists state(-progress)]} {
685 eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
693 # Error handling wrapper around fcopy
696 # s The socket to copy from
697 # token The token returned from http::geturl
700 # This closes the connection upon error
702 proc http::CopyStart {s token} {
706 fcopy $s $state(-channel) -size $state(-blocksize) -command \
707 [list http::CopyDone $token]
715 # fcopy completion callback
718 # token The token returned from http::geturl
719 # count The amount transfered
724 proc http::CopyDone {token count {error {}}} {
728 incr state(currentsize) $count
729 if {[info exists state(-progress)]} {
730 eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
732 # At this point the token may have been reset
733 if {[string length $error]} {
735 } elseif {[catch {eof $s} iseof] || $iseof} {
744 # Handle eof on the socket
747 # token The token returned from http::geturl
750 # Clean up the socket
752 proc http::Eof {token} {
755 if {[string equal $state(state) "header"]} {
757 set state(status) eof
767 # See documentaion for details.
770 # token Connection token.
773 # The status after the wait.
775 proc http::wait {token} {
779 if {![info exists state(status)] || [string length $state(status)] == 0} {
780 # We must wait on the original variable name, not the upvar alias
781 vwait $token\(status)
783 if {[info exists state(error)]} {
784 set errorlist $state(error)
786 eval error $errorlist
788 return $state(status)
791 # http::formatQuery --
793 # See documentaion for details.
794 # Call http::formatQuery with an even number of arguments, where
795 # the first is a name, the second is a value, the third is another
799 # args A list of name-value pairs.
804 proc http::formatQuery {args} {
808 append result $sep [mapReply $i]
809 if {[string compare $sep "="]} {
820 # Do x-www-urlencoded character mapping
823 # string The string the needs to be encoded
828 proc http::mapReply {string} {
831 # The spec says: "non-alphanumeric characters are replaced by '%HH'"
832 # 1 leave alphanumerics characters alone
833 # 2 Convert every other character to an array lookup
834 # 3 Escape constructs that are "special" to the tcl parser
835 # 4 "subst" the result, doing all the array substitutions
837 set alphanumeric a-zA-Z0-9
838 regsub -all \[^$alphanumeric\] $string {$formMap(&)} string
839 regsub -all \n $string {\\n} string
840 regsub -all \t $string {\\t} string
841 regsub -all {[][{})\\]\)} $string {\\&} string
842 return [subst $string]
845 # http::ProxyRequired --
846 # Default proxy filter.
849 # host The destination host
852 # The current proxy settings
854 proc http::ProxyRequired {host} {
856 if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
857 if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} {
858 set http(-proxyport) 8080
860 return [list $http(-proxyhost) $http(-proxyport)]