pg2mysql



#!/usr/bin/env ruby

##
# Andrew Coleman <mercury at penguincoder dot org>
# Released under the GPLv2 http://gnu.org
# General purpose tranlator for pgsql -> mysql. Best used with Rails-like data.
#
# Use: pg_dump -DOx [dbname] > outputfile
# to get a proper dump for this program.
#
# Allows for CREATE TABLE, INSERT INTO, and CREATE INDEX translations.
# Keeps DEFAULT values and strips all Postgres style data casting (::).
# Also can interpret multi-line INSERT statements (for those that use newlines
# in text fields). The script will also include any columns from INHERIT tables
# at the end of the CREATE TABLE clause.
#
# General Usage:
#  ./pg2mysql.rb [input files] > mysql-compatible-dump
# also:
#  pg_dump -DOx [dbname] | ./pg2mysql.rb > mysql-compatible-dump
#
# Assumptions:
#  * field 'id' is serial primary key and is converted as such.
#  * all tables are converted to default type. modify configuration as such.
#

# if you use more data types than this, add them here.
# greedy matching. make sure the order in the array is right to get the
# right data conversions. just look at the ordering of the character match.
$conversion_table = [
  [ /integer|serial/, 'int(11)' ],
  [ /character\(([0-9]+)\)/, 'char($1)' ],
  [ /character varying\(([0-9]+)\)/, 'varchar($1)' ],
  [ /character varying/, 'tinytext' ],
  [ /timestamp with(out)? time zone/, 'timestamp null default null' ],
  [ /time with(out)? time zone/, 'time' ],
  [ /text/, 'text' ],
  [ /date/, 'date' ],
  [ /double precision/, 'double precision' ],
  [ /real/, 'double precision' ],
  [ /boolean/, 'tinyint(1)' ],
  [ /numeric\(([0-9,]+)\)/, 'decimal($1)' ]
]

##
# Returns a string for the converted MySQL type from the given Postgres
# data type. This will preserve the DEFAULT values also, but it does remove
# all data casting.
#
def convert_to_mysql_type(type)
  row = $conversion_table.detect { |r| r.first.match(type) }
  if row.nil?
    $stderr.puts "COULD NOT FIND MYSQL TYPE FOR '#{type}'"
    exit 1
  end
  res = type.gsub(row.first, row.last)
  res.gsub!('$1', $1) if $1
  if res =~ /varchar\(([0-9]+)\)/ and $1.to_i > 100
    res = 'mediumtext' # most hackish part of this program...
  end
  res
end

# flags and constants used in the loop
defining_table = false # knows to collect column names instead of other SQL
in_insert = false # flag for determining if it is a multi-line insert
table_name = nil # current name of the table
table_fields = {} # hash of column names grouped by table_name

$<.each do |line| # reads from stdin or all files given on the command line
  out_line = '' # the line to print out
  if defining_table
    next if line =~ /^\)$/ # skip for inherits
    if line =~ /^\);$/
      defining_table = false # finished defining table
      # join columns together... so you can skip the last comma.
      out_line = "#{table_fields[table_name].join(",\n")}\n);"
    elsif line =~ /^INHERITS \(([\w, ]+)\);/
      defining_table = false
      # add in inherited parent columns
      $1.split(/\, ?/).each { |p| table_fields[p].each { |f|
        next if table_fields[table_name].include?(f)
        table_fields[table_name] << f } }
      out_line = "#{table_fields[table_name].join(",\n")}\n);" # join cols
    else
      line.gsub!(/NULL::.+$/, 'NULL,') # remove postgres casting
      field = line.split.first # get the first word (column name)
      type = line.gsub(field, '').strip # remove the column name
      type.chop! if type =~ /\,$/ # remove the comma from the end
      mysql_type = convert_to_mysql_type(type) # convert to mysql type
      # for session_id, we need a limit
      mysql_type = 'varchar(255)' if field == 'session_id'
      fname = "  #{field} #{mysql_type}" # get output line for data type
      # assume that the 'id' field should be built just like this
      fname = '  id serial primary key' if field == 'id'
      table_fields[table_name] << fname # add the line to the data column array
    end
  elsif line =~ /^CREATE TABLE \"?(\w+)\"?/
    defining_table = true # flag to start defining a table
    table_name = $1 # set the table name from the regex
    table_fields[table_name] ||= [] # basic array of empty columns
    out_line = line
  elsif line =~ /^CREATE INDEX (\w+) ON (\w+) USING (\w+) \((\w+)\);$/
    # create an index on a column, have to flip the words around
    out_line = "CREATE INDEX #{$1} USING #{$3} ON #{$2} (#{$4});"
  elsif in_insert or line =~ /^INSERT INTO/
    in_insert = true # flag used for multi-line insert statements
    out_line = line # add a row into a table
    in_insert = false if line =~ /\);$/
  end
  # put out the line unless there is no line to process. also convert
  # postgres (") into mysql (`)
  # also, postgres has strings like this: E'some string'
  puts out_line.gsub('"', '`').gsub(/ [A-Z]\'/, " '") unless out_line.empty?
end