.dbAddData <- function(conn, sql, bind.data)
{
    dbBeginTransaction(conn)
    dbGetPreparedQuery(conn, sql, bind.data)
    dbCommit(conn)
}

.loadSIFT <- function(file)
{
    ## no header in file
    ## column information is contained in
    ## "/srcdata/SIFT.Hsapiens.dbSNP132/downloads/readme_collated_predictions_dbSNP132.txt
    dat <- read.delim(file, header=FALSE)
    COLNAMES <- c("rsid", "protein_id", "aa_change", 
        "snp_aa_BH", "snp_method_BH", "snp_prediction_BH", "snp_score_BH", 
        "snp_median_BH", "snp_positionseqs_BH", "snp_totalseqs_BH", 
        "ref_aa_BH", "ref_method_BH", "ref_prediction_BH", "ref_score_BH", 
        "ref_median_BH", "ref_positionseqs_BH", "ref_totalseqs_BH", 
        "snp_aa_AH", "snp_method_AH", "snp_prediction_AH", "snp_score_AH", 
        "snp_median_AH", "snp_positionseqs_AH", "snp_totalseqs_AH", 
        "ref_aa_AH", "ref_method_AH", "ref_prediction_AH",
        "ref_score_AH", "ref_median_AH", "ref_positionseqs_AH",
        "ref_totalseqs_AH")
    colnames(dat) <- COLNAMES 

    ## remove 'rs' from rsid
    rsid <- as.integer(gsub("rs", "", dat$rsid, fixed=TRUE))

    ## Replace missing values in 'prediction' with 'not scored'
    dat$ref_prediction_BH[dat$ref_prediction_BH == ""] <- "NOT SCORED"
    dat$snp_prediction_BH[dat$snp_prediction_BH == ""] <- "NOT SCORED"
    dat$ref_prediction_AH[dat$ref_prediction_AH == ""] <- "NOT SCORED"
    dat$snp_prediction_AH[dat$snp_prediction_AH == ""] <- "NOT SCORED"

    ## Reconstruct table without aa columns or method.
    ## This information is in the column names.
    data.frame(rsid, dat[,c(2:3)], dat[,c(6:10)], dat[,c(13:17)], 
        dat[,c(20:24)], dat[,c(27:31)])

}
 
.writeMetadata <- function(conn)
{
    rsqlite_version <- installed.packages()['RSQLite', 'Version']
    mat <- matrix(c(
        "Db type", "SIFTDb",
        "Data source", "SIFT",
        "Genome", "hg19",
        "Genus and Species", "Homo sapiens",
        "Resource URL",
            "http://sift.jcvi.org/",
        "dbSNP build", "132",
        "Creation time",   svn.time(),
        "RSQLite version at creation time",
            installed.packages()['RSQLite', 'Version'],
        "package", "VariantAnnotation"),
        ncol=2, byrow=TRUE
    )
    colnames(mat) <- c("name", "value")
    metadata <- data.frame(name=mat[ , "name"], value=mat[ , "value"],
        stringsAsFactors=FALSE)
    dbWriteTable(conn, "metadata", metadata, row.names=FALSE)
    cat("done with 'metadata' table \n")
}

.writeSIFT <- function(conn, file)
{
    dat <- .loadSIFT(file)
    sql <- c(
        "CREATE TABLE siftdata (\n",
        " RSID INTEGER NOT NULL,\n",
        " PROTEINID TEXT,\n",
        " AACHANGE TEXT,\n",
        " SNPPREDICTIONBH TEXT,\n",
        " SNPSCOREBH REAL,\n",
        " SNPMEDIANBH REAL,\n",
        " SNPPOSITIONSEQSBH INTEGER,\n",
        " SNPTOTALSEQSBH INTEGER,\n",
        " RESPREDICTIONBH TEXT,\n",
        " RESSCOREBH REAL,\n",
        " RESMEDIANBH REAL,\n",
        " RESPOSITIONSEQSBH INTEGER,\n",
        " RESTOTALSEQSBH INTEGER,\n",
        " SNPPREDICTIONAH TEXT,\n",
        " SNPSCOREAH REAL,\n",
        " SNPMEDIANAH REAL,\n",
        " SNPPOSITIONSEQSAH INTEGER,\n",
        " SNPTOTALSEQSAH INTEGER,\n",
        " RESPREDICTIONAH TEXT,\n",
        " RESSCOREAH REAL,\n",
        " RESMEDIANAH REAL,\n",
        " RESPOSITIONSEQSAH INTEGER,\n",
        " RESTOTALSEQSAH INTEGER \n",
        ")")
    dbGetQuery(conn, paste(sql, collapse=""))
    dbGetQuery(conn, "CREATE INDEX rsid_index on siftdata (RSID)")
 
    qmarks <- paste(rep("?", ncol(dat)), collapse=",")
    sql <- paste("INSERT INTO siftdata VALUES (", qmarks, ")", sep="") 
    .dbAddData(conn, sql, dat)
    cat("done with 'siftdata' table \n")
}

makeSIFTDb <- function(filepath, savepath, dbfile, dbsnpfile, ...) 
{
    if (length(list(...)) != 0L)
        warning("extra args are ignored for now")

    conn <- dbConnect(SQLite(), dbname=dbfile)
    .writeSIFT(conn, dbsnpfile)
    .writeMetadata(conn)
    dbDisconnect(conn)
    cat(paste("creation of ", basename(dbfile), " complete \n", sep=""))
}




