/***************************************************************************\
 *  webgrab - v1.3 - Copyright 1995, Brian J. Swetland                     *
 *                                                                         *
 *  - initial version by Brian Swetland                                    *
 *  - cleaned up a bit by Brandon Long                                     *
 *  - proxy support by Kristin Buxton                                      *
 *  - cleaned up more by Brian Swetland                                    *
 *  - improved / mucked around with by Matt Hebley                         *
 *  - tidied up and improved by Volker Kuhlmann, version 2.1               *
 *    v.kuhlmann@elec.canterbury.ac.nz                                     *
 *  - changed crypt code, Volker Kuhlmann, version 2.2                     *
 *  - added option -f, Volker Kuhlmann, version 2.3                        *
 *  - fixed usage, Volker Kuhlmann, version 2.32                           *
 *                                                                         *
 *  Free for any personal or non-commercial use.                           *
 *  Use at your own risk.  If you like it, buy the authors a pizza.        *
\***************************************************************************/

#define VERSION "2.32 (crypt64 1.12)"

/* Program exit values */
#define EXIT_OK 0
#define EXIT_LOCALIOERR 2
#define EXIT_REMOTEIOERR 3
#define EXIT_ARGSYNTAX 4
#define EXIT_VALUE 5
#define EXIT_USAGE 6
#define EXIT_INTERNAL 7

/* default port number, used if not specified */
#define DEFAULTPORT 80


#include <stdio.h>
#include <fcntl.h>

#ifdef __bsdi__
# include <sys/malloc.h>
#else
# ifndef NeXT
#  include <malloc.h>
# endif
#endif

#include <sys/time.h>
#include <sys/types.h>

#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <netdb.h>

#include <string.h>
#include <ctype.h>



/* ENCRYPTION SUPPORT */

/* include the encryption stuff - mean but hey */
#include "crypt64.c"

/* max len of the unencrypted(!) user/password strings, when they have
   the format user:password */
#define USERPASSSIZE 128

/* encrypted authorisation string */
char authstr[USERPASSSIZE];



/* strdup isn't portable, so we make our own.  */
char *strd(char *s) {
    char *d;

    d = (char *) malloc(strlen(s) + 1);
    strcpy(d,s);
    return(d);
}



/* parses URL looking like blah://host[:port][/path]
   will ignore anything before the first : and terminate path when it
   hits >, ", or whitespace -- returns portno or 0 if bad url
   In case of successful completion, allocated memory is not freed.
   Returns the last component of the URL as file name, or the host name
   if there is no path.
*/
int parseURL(char *url, char **host, char **path, char **file)
{
    char *p, *pp, *pf;
    int port;

    p = url;

	/* skip anything up to the first : (the one after http, etc) */
    while(*p && *p!=':') p++;
    if(!*p)
	return 0;

	/* REQUIRE two '/'s */
    if(!(*(++p) && (*p =='/') && *(++p) && (*p == '/')))
	return 0;

    p++;

	/* mark the beginning of the hostname */
    pp = p;
	/* hostname is terminated by a '/' or '>', '"', or whitespace */
    while(*p && *p!=':' && *p!='/' && *p!='"' && *p!='>' && !isspace(*p))
	p++;

    *host = (char *) malloc(p-pp+1);
    strncpy(*host,pp,p-pp);
    (*host)[p-pp]=0;

	/* optionally read a portnumber */
    if(*p==':'){
	p++;
	port = 0;
	while(*p && isdigit(*p)){
	    port = port*10 + (*p-'0');
	    p++;
	}
	if(!*p || *p!='/') {
	    free(*host);
	    return 0;
	}
    } else {
	port = DEFAULTPORT;
    }

	/* still more; get path, and file name */
    if(*p && (*p=='/')){
	pf = pp = p;
	while(*p && *p!='"' && *p!='>' && !isspace(*p)) {
	    if (*p == '/'  &&  *(p+1))
		pf = p;
	    p++;
	}
	*p = 0;
	*path = strd(pp);
	if ((p - pf) > 1)
	    *file = strd (pf + 1);
	else
	    *file = *host;
    } else {
	*path = strd("/");
	*file = *host;
    }
    return port;
}



void encrypt (void)
{
    encrypt64_stdio ();
    exit (EXIT_OK);
}



void decrypt (void)
{
    if (decrypt64_stdio ()) {
	fprintf (stderr, 
		"decrypt64 error: encountered illegal char in input.\n");
	exit (EXIT_VALUE);
    }
    exit (EXIT_OK);
}



void param_err (void)
{
    fprintf (stderr,
	"Error with parameters.\n"
	"Required argument to option missing, double argument,"
		" or unknown option.\n"
	"Call without parameters for usage.\n");
    exit (EXIT_ARGSYNTAX);
}



void usage(char *argv)
{
    printf(
	"\nWebgrabber: The Command Line Browser\tVersion %s\n"
	"Usage: %s [-ghsprfED] [-P <proxy>] [-a <file>] <url>\n"
	"   -g      Do a GET (default), writes to stdout\n"
	"   -h      Do a GET, Headers Only, writes to stdout\n"
	"   -s      Suppress headers of grabbed page\n"
	"   -p      Do a POST\n"
	"   -r      Read HTTP headers from stdin (sent for both GET and POST)\n"
	"   -f      Save to file, don't write stdout. Filename is taken "
			"from URL.\n"
	"   -P      Next argument is <proxy>\n"
	"   <proxy> HTTP Proxy Host (hostname[:port] format)\n"
	"   -a      Authentication: Next argument is <file>\n"
	"   <file>  File containing: user:password (un-encrypted), or"
			" string (encrypted)\n"
	"              use '-' for stdin\n"
	"   <url>   URL to retrieve (in http:// format)\n"
	"   -E      Encrypt from stdin to stdout\n"
	"   -D      Decrypt from stdin to stdout\n"
	"\n"
	"Note: though any input will be encrypted, authentication requires\n"
	"      the form: user:password\n"
	"\n"
	,VERSION,argv);
    exit(EXIT_USAGE);
}



int main(int argc, char *argv[])
{
    int s, i, port, pport;
    struct sockaddr_in sa;
    /* struct sockaddr sa; */
    struct hostent *hp;
    FILE *fpo,*fpi, *fauth, *fpo2;
    char buf[1024];
    char *path,*host,*file,*p;

      /* operational flags */
    int ignore=0,head=0,readin=0,get=1,fileout=0,
	proxy=0,proxynext=0,auth=0,authnext=0,
	url=0;

    if (argc == 1) usage (argv[0]);

    for(i = 1; i < argc; i++){
	if(proxynext){ /* this arg is our proxy */
	    proxy = i; proxynext = 0;
	    continue;
	}
	if (authnext) { /* this arg is our authorisation */
	    auth = i; authnext = 0;
	    continue;
	}
	if(argv[i][0]=='-'){
	    for(path=&argv[i][1];*path;path++){
		switch(*path){
		case 'r':
		    readin = 1;
		    break;
		case 's':
		    ignore = 1;
		    break;
		case 'g':
		    get = 1;
		    break;
		case 'p':
		    get = 0;
		    break;
		case 'h':
		    head = 1;
		    break;
		case 'f':
		    fileout = 1;
		    break;
		case 'P':
		    if(proxy) param_err();
		    proxy = proxynext = 1;
		    break;
		case 'a':
		    if (auth) param_err();
		    auth = authnext = 1;
		    break;
		case 'E':
		    encrypt ();
		    break;
		case 'D':
		    decrypt ();
		    break;
		default:
		    fprintf (stderr, "\nUnrecognised parameter: %c\n", *path);
		    usage(argv[0]);
		}
	    }
	    continue;
	}
	    /* must be a url */
	if(url) param_err();
	url = i;
    }

    if(proxynext || !url || authnext) param_err();

    if(!(port=parseURL(argv[url], &host, &path, &file))){
	fprintf(stderr,"error: invalid url\n");
	exit(EXIT_ARGSYNTAX);
    }
/*	printf("file name: %s\n", file); exit (-5);*/

	/* get and encrypt authorisation stuff if necessary */
    if (auth) {
	if (strcmp (argv[auth], "-") == 0)
	    fgets (buf, USERPASSSIZE, stdin);
	else {
	    fauth = fopen (argv[auth], "r");
	    if (fauth == NULL) {
		fprintf (stderr, "error: opening file: %s\n", argv[auth]);
		exit (EXIT_LOCALIOERR);
	    }
	    if (fgets (buf, USERPASSSIZE, fauth) == NULL) {
		fprintf (stderr, "error: reading file: %s\n", argv[auth]);
		exit (EXIT_LOCALIOERR);
	    }
	    fclose (fauth);
	}
	if (strchr (buf, ':') != NULL) {
	    /* ensure no buffer overflow */
	    buf[USERPASSSIZE / 3 * 2 - 1] = '\0';
	    encrypt64_mem (buf, authstr);
	} else
	    strncpy (authstr, buf, USERPASSSIZE-1);
	authstr[USERPASSSIZE-1] = '\0';
    }

	/* find the server */
    if(proxy){
	pport = 8080;
	p = argv[proxy];

	    /* look for a portnum */
	while(*p){
	    if(*p==':'){
		*p=0;
		p++;
		pport = atoi(p);
		break;
	    }
	    p++;
	}
	if(!(hp = gethostbyname(argv[proxy]))) {
	    fprintf(stderr,"error: can't get proxy %s.\n",argv[proxy]);
	    exit(EXIT_REMOTEIOERR);
	}
    } else {
	if(!(hp = gethostbyname(host))) {
	    fprintf(stderr,"error: can't get host %s.\n",host);
	    exit(EXIT_REMOTEIOERR);
	}
    }

	/* Setup the socket */
    memset(&sa, 0, sizeof(sa));
    sa.sin_port = htons(proxy ? pport : port);
    memcpy((char *)&sa.sin_addr, (char *)hp->h_addr, hp->h_length);
    sa.sin_family = hp->h_addrtype;
    

	/* allocate the socket */
    if((s = socket(hp->h_addrtype, SOCK_STREAM, 0)) < 0){
	fprintf(stderr,"error: can't get socket\n");
	exit(EXIT_LOCALIOERR);
    }

/*   sa.sa_family=hp->h_addrtype;
	memcpy((char *)&sa.sa_data, (char *)hp->h_addr, hp->h_length);*/
	
	/* connect to the server */
    if(connect(s, (struct sockaddr *) &sa, sizeof(sa)) < 0){
	close(s);
	fprintf(stderr,"error: can't connect\n");
	exit(EXIT_REMOTEIOERR);
    }

    fpo = fdopen(s,"w");
    fpi = fdopen(s,"r");
    if(proxy){
	fprintf(fpo,"%s http://%s:%d%s HTTP/1.1\r\n",
		head?"HEAD":get?"GET":"POST",host,port,path);
    } else {
	fprintf(fpo,"%s %s HTTP/1.1\r\n",head?"HEAD":get?"GET":"POST",path);
    }

	/* send headers */
    if (readin) {
	    /* copy headers from stdin ... */
	while(!feof(stdin)){
	    i = fread(buf,1,1024,stdin);
	    if(i) fwrite(buf,1,i,fpo);
	    if(feof(stdin)) break;
	}
    } else {
	    /* send our normal header info */
	fprintf(fpo,
		"User-Agent: WebGrab/%s (commandline forever)\r\n",
		VERSION);
    }

	/* send authorisation if given */
    if (auth) {
	fprintf (fpo, "Proxy-authorization: Basic %s\r\n", authstr);
    }
	/* send empty line: end of headers */
    fputs("\r\n",fpo);
    fflush(fpo);

	/* set output: stdout or file */
    if (fileout) {
	fpo2 = fopen (file, "w");
	if (fpo2 == NULL) {
	    fprintf (stderr, "error: opening output file %s\n", file);
	    exit (EXIT_LOCALIOERR);
	}
    } else
	fpo2 = stdout;

	/* handle headers */
    while(!feof(fpi)){
	fgets(buf,1024,fpi);
	if(!ignore) fprintf(fpo2,"%s",buf);
	if(feof(fpi) || buf[0]<' ') break;
    }
    while(!feof(fpi)){
	i = fread(buf,1,1024,fpi);
	if(i) fwrite(buf,1,i,fpo2);
	if(feof(fpi)) break;
    }
    close(s);
    if (fileout)
	fclose (fpo2);
    exit(EXIT_OK);
}

