6 makeConnection ( HTTPInputSource *pHttp )
8 struct sockaddr_in sock;
11 /* Get internet address of the host. */
12 if (!(pHost = gethostbyname ( pHttp->pHostName )))
16 /* Copy the address of the host to socket description. */
17 memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
19 /* Set port and protocol */
20 sock.sin_family = AF_INET;
21 sock.sin_port = htons( pHttp->nPort );
23 /* Make an internet socket, stream type. */
24 if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
27 /* Connect the socket to the remote host. */
28 if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
30 if (errno == ECONNREFUSED)
39 int parseURL( HTTPInputSource *pHttp, tmbstr url )
44 pStr = tmbsubstr( url, "://" );
46 /* If protocol is there, but not http, bail out, else assume http. */
49 if (tmbstrncasecmp( url, "http://", 7 ))
55 for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
59 /* Get the hostname. */
60 pHttp->pHostName = tmbstrndup (&url[j], i - j );
64 /* We have a colon delimiting the hostname. It should mean that
65 a port number is following it */
67 if (isdigit( url[++i] )) /* A port number */
69 for (; url[i] && url[i] != '/'; i++)
71 if (isdigit( url[i] ))
72 pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
79 else /* or just a misformed port number */
83 /* Assume default port. */
86 /* skip past the delimiting slash (we'll add it later ) */
87 while (url[i] && url[i] == '/')
89 pHttp->pResource = tmbstrdup (url + i );
94 int fillBuffer( HTTPInputSource *in )
98 in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
100 if (in->nBufSize < sizeof( in->buffer ))
101 in->buffer[in->nBufSize] = '\0';
109 int openURL( HTTPInputSource *in, tmbstr pUrl )
115 rc = WSAStartup( 514, &wsaData );
118 in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
119 in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
120 in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
121 in->tis.sourceData = (uint) in;
122 in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
123 parseURL( in, pUrl );
124 if (0 == (rc = makeConnection( in )))
126 char ch, lastCh = '\0';
129 char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
130 sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
131 send( in->s, getCmd, strlen( getCmd ), 0 );
134 /* skip past the header information */
135 while ( in->nextBytePos >= in->nBufSize
136 && 0 < (rc = fillBuffer( in )))
140 for (; in->nextBytePos < sizeof( in->buffer )
141 && 0 != in->buffer[ in->nextBytePos ];
144 ch = in->buffer[ in->nextBytePos ];
145 if (ch == '\r' || ch == '\n')
149 /* Two carriage returns or two newlines in a row,
150 that's good enough */
153 if (lastCh == '\r' || lastCh == '\n')
163 /* end of header, scan to first non-white and return */
164 while ('\0' != ch && isspace( ch ))
165 ch = in->buffer[ ++in->nextBytePos ];
175 void closeURL( HTTPInputSource *source )
178 closesocket( source->s );
180 source->tis.sourceData = 0;
187 int HTTPGetByte( HTTPInputSource *source )
189 if (source->nextUnGotBytePos)
190 return source->unGetBuffer[ --source->nextUnGotBytePos ];
191 if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
193 fillBuffer( source );
195 if (0 == source->nBufSize)
197 return source->buffer[ source->nextBytePos++ ];
200 void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
202 if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
203 source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
206 Bool HTTPIsEOF( HTTPInputSource *source )
208 if (source->nextUnGotBytePos)
209 /* pending ungot bytes, not done */
212 if ( 0 != source->nBufSize
213 && source->nextBytePos >= source->nBufSize)
214 /* We've consumed the existing buffer, get another */
215 fillBuffer( source );
217 if (source->nextBytePos < source->nBufSize)
218 /* we have stuff in the buffer, must not be done. */
221 /* Nothing in the buffer, and the last receive failed, must be done. */