lib/UR/DataSource/CSV.pm

package UR::DataSource::CSV;
use strict;
use warnings;

# There are still a few issues with actually using this thing:
#
# when running
# ur define datasource rdbms --dsn DBD:CSV:f_dir=/tmp/trycsv
# the f_dir=... part doesn't get put in as a "server" attribute.
# You have to add in it by hand as sub server {}
#
# after ur update classes, there aren't any id properties defined
# for your new classes, because there's no conclusive way to pick
# the right one - no unique constraints
#
# _get_sequence_name_for_table_and_column() and _get_next_value_from_sequence()
# aren't implemented yet, so creating new entities and sync_databases
# won't work
#
# There's a bug in even the latest SQL::Statement on CPAN where the processing
# of JOIN clauses uses a case sensitive match against upper-case stuff, when it
# should be lower-case.  It also cannot handle more than one join in the same
# statement
#
# with that out of the way... on to the show!

require UR;
our $VERSION = "0.392"; # UR $VERSION;

use File::Basename;

UR::Object::Type->define(
    class_name => 'UR::DataSource::CSV',
    is => ['UR::DataSource::RDBMS'],
    is_abstract => 1,
);

# RDBMS API

sub driver { "CSV" }  

sub owner { 
    undef
}

sub login {
    undef
}

sub auth {
    undef
}


sub path { 
    my $server = shift->server;

    my @server_opts = split(';', $server);
    foreach my $opt ( @server_opts )  {
        my($key,$value) = split('=',$opt);
        if ($key eq 'f_dir') {
            return $value;
        }
    }

    return;
}
    

sub can_savepoint { 0;}  # Dosen't support savepoints

sub _dbi_connect_args {
    my $self = shift;

    my @connection = $self->SUPER::_dbi_connect_args(@_);
    delete $connection[3]->{'AutoCommit'};   # DBD::CSV doesn't support autocommit being off
    return @connection;
}


sub _get_sequence_name_for_table_and_column {
    Carp::croak("Not implemented yet");

    my $self = shift->_singleton_object;
    my ($table_name,$column_name) = @_;
    
    my $dbh = $self->get_default_handle();
    
    # See if the sequence generator "table" is already there
    my $seq_table = sprintf('URMETA_%s_%s_seq', $table_name, $column_name);
    unless ($self->{'_has_sequence_generator'}->{$seq_table} or
            grep {$_ eq $seq_table} $self->get_table_names() ) {
        unless ($dbh->do("CREATE TABLE IF NOT EXISTS $seq_table (next_value integer PRIMARY KEY AUTOINCREMENT)")) {
            die "Failed to create sequence generator $seq_table: ".$dbh->errstr();
        }
    }
    $self->{'_has_sequence_generator'}->{$seq_table} = 1;

    return $seq_table;
}

sub _get_next_value_from_sequence {
    Carp::croak('Not implemented yet');

    my($self,$sequence_name) = @_;

    my $dbh = $self->get_default_handle();

    # FIXME can we use a statement handle with a wildcard as the table name here?
    unless ($dbh->do("INSERT into $sequence_name values(null)")) {
        die "Failed to INSERT into $sequence_name during id autogeneration: " . $dbh->errstr;
    }

    my $new_id = $dbh->last_insert_id(undef,undef,$sequence_name,'next_value');
    unless (defined $new_id) {
        die "last_insert_id() returned undef during id autogeneration after insert into $sequence_name: " . $dbh->errstr;
    }

    unless($dbh->do("DELETE from $sequence_name where next_value = $new_id")) {
        die "DELETE from $sequence_name for next_value $new_id failed during id autogeneration";
    }

    return $new_id;
}


# Given a table name, return the complete pathname to it
# As ur update classes calls this, $table has been uppercased
# already (because most data sources uppercase table names),
# so we need to figure out which file they're talking about
sub _find_pathname_for_table {
    my $self = shift;
    my $table = shift;

    my $path = $self->path;

    my @all_files = glob("$path/*");
    # note: this only finds the first one
    foreach my $pathname ( @all_files ) {
        if (File::Basename::basename($pathname) eq $table) {
            return $pathname;
        }
    }

    return;
}

# column_info doesn't work against a DBD::CSV handle
sub get_column_details_from_data_dictionary {
    my($self,$catalog,$schema,$table,$column) = @_;

    # Convert the SQL wildcards to glob wildcards
    $table =~ tr/%_/*?/;

    # Convert the SQL wildcards to regex wildcards
    $column =~ s/%/\\w*/;
    $column =~ s/_/\\w/;
    my $column_regex = qr($column);

    my(@matching_files) = $self->_find_pathname_for_table($table);
    my @found_columns;
    foreach my $file ( @matching_files ) {
        my $table_name = File::Basename::basename($file);
        my $fh = IO::File->new($file);
        unless ($fh) {
            $self->warning_message("Can't open file $file for reading: $!");
            next;
        }

        my $header = $fh->getline();
        $header =~ s/\r|\n//g;  # Remove newline/CR
        
        my @columns = split($self->get_default_handle->{'csv_sep_char'} ||',' , $header);
        my $column_order = 0;
        foreach my $column_name ( @columns ) {
            $column_order++;
            next unless $column_name =~ m/$column_regex/;

            push @found_columns, { TABLE_CAT => $catalog,
                                   TABLE_SCHEM => $schema,
                                   TABLE_NAME => $table_name,
                                   COLUMN_NAME => $column_name,
                                   DATA_TYPE => 'STRING',   # what else could we put here?
                                   TYPE_NAME => 'STRING',
                                   NULLABLE => 1,           # all columns are nullable in CSV files
                                   IS_NULLABLE => 'YES',
                                   REMARKS => '',
                                   COLUMN_DEF => '',
                                   SQL_DATA_TYPE => '',  # FIXME shouldn't this be something related to DATA_TYPE
                                   SQL_DATETIME_SUB => '',
                                   CHAH_OCTET_LENGTH => undef,  # FIXME this should be the same as column_size, right?
                                   ORDINAL_POSITION => $column_order,
                                }
        }
    }

    my $sponge = DBI->connect("DBI:Sponge:", '','')
        or return $self->get_default_handle->set_err($DBI::err, "DBI::Sponge: $DBI::errstr");

    my @returned_names = qw( TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME COLUMN_SIZE
                             BUFFER_LENGTH DECIMAL_DIGITS NUM_PREC_RADIX NULLABLE REMARKS COLUMN_DEF
                             SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH ORDINAL_POSITION IS_NULLABLE );
    my $returned_sth = $sponge->prepare("column_info $table", {
        rows => [ map { [ @{$_}{@returned_names} ] } @found_columns ],
        NUM_OF_FIELDS => scalar @returned_names,
        NAME => \@returned_names,
    }) or return $self->get_default_handle->set_err($sponge->err(), $sponge->errstr());

    return $returned_sth;
}


# DBD::CSV doesn't support foreign key tracking
# returns a statement handle with no data to read
sub get_foreign_key_details_from_data_dictionary {
my($self,$fk_catalog,$fk_schema,$fk_table,$pk_catalog,$pk_schema,$pk_table) = @_;

    my $sponge = DBI->connect("DBI:Sponge:", '','')
        or return $self->get_default_handle->DBI::set_err($DBI::err, "DBI::Sponge: $DBI::errstr");

    my @returned_names = qw( FK_NAME UK_TABLE_NAME UK_COLUMN_NAME FK_TABLE_NAME FK_COLUMN_NAME );
    my $table = $pk_table || $fk_table;
    my $returned_sth = $sponge->prepare("foreign_key_info $table", {
        rows => [],
        NUM_OF_FIELDS => scalar @returned_names,
        NAME => \@returned_names,
    }) or return $self->get_default_handle->DBI::set_err($sponge->err(), $sponge->errstr());

    return $returned_sth;
}


# DBD::CSV dosen't support bitmap indicies, so there aren't any
sub get_bitmap_index_details_from_data_dictionary {
    return [];
}


# DBD::CSV doesn't support unique constraints
sub get_unique_index_details_from_data_dictionary {
    return {};
}

sub get_table_details_from_data_dictionary {
    my($self,$catalog,$schema,$table,$type) = @_;
    
    # DBD::CSV's table_info seems to always give you back all the "tables" even
    # if you only asked for details on one of them
    my $sth = $self->SUPER::get_table_details_from_data_dictionary($catalog,$schema,$table,$type);

    # Yeah, it's kind of silly to have to read in all the data and repackage it
    # back into another sth
    my @returned_details;
    while (my $row = $sth->fetchrow_arrayref()) {
        next unless ($row->[2] eq $table);
        push @returned_details, $row;
    }
        
    my $sponge = DBI->connect("DBI:Sponge:", '','')
        or return $self->get_default_handle->DBI::set_err($DBI::err, "DBI::Sponge: $DBI::errstr");

    my @returned_names = qw( TABLE_CAT TABLE_SCHEM TABLE_NAME TABLE_TYPE REMARKS );
    my $returned_sth = $sponge->prepare("table_info $table", {
        rows => \@returned_details,
        NUM_OF_FIELDS => scalar @returned_names,
        NAME => \@returned_names,
    }) or return $self->get_default_handle->DBI::set_err($sponge->err(), $sponge->errstr());

    $returned_sth;
}



# By default, make a text dump of the database at commit time.
# This should really be a datasource property
sub dump_on_commit {
    0;
}


1;


=pod

=head1 NAME

UR::DataSource::CSV - Parent class for data sources using DBD::CSV

=head1 DESCRIPTION

UR::DataSource::CSV is a subclass of L<UR::DataSource::RDBMS> and can be
used for interacting with CSV files.  Because of the limitations of the
underlying modules (such as SQL::Statement only supporting one join at a
time), this module is deprecated.

L<UR::DataSource::File> implements a non-SQL interface for data files, and
is the proper way to use a file as a data source for class data.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)