#!/usr/bin/env ruby
#
#  tw2td.rb - 過去のツイートを取得し、tDiary にポストするスクリプト
#
#   (c) 2012, Norihisa Washitake http://washitake.com/diary/public/
#

require 'rubygems'
require 'twitter'
require 'uri'
require 'cgi'
require 'net/http'

script_path = File::dirname($0)

require "#{script_path}/lib/thumbnails"
require "#{script_path}/lib/parse_args"
require "#{script_path}/lib/cache"
load    "#{script_path}/tw2td.conf"

module Twitter
  class Status
    attr_reader :attrs
  end
end


# 1リクエストあたり最大取得件数（200が最大）
$twitter_maximum_count = 100

# Twitter API は、1時間に150件以上のリクエストを処理できない
#$twitter_sleep = 3600 / 150
$twitter_sleep = 5

$cmd_opts = parse_args(ARGV, {
              :debug=>false, :verbose=>false,
              :no_tweet=>false, :days=>1, :max_id=>0,
              :cont=>false
            })

$lastid = $cmd_opts[:cont] ? read_cache() : $cmd_opts[:max_id]

def debug(txt, level=2)
  puts " " * level + txt if $cmd_opts[:debug]
end

def verbose(txt)
  puts txt if ($cmd_opts[:debug] || $cmd_opts[:verbose])
end
  

def make_link(url, text, force_html=false)
  if $diary_format == 'wiki' then
    if force_html then
      return "{{'<a href=\"#{url}\">#{text}</a>'}}"
    else
      return %Q{[[#{text}|#{url}]]}
    end
  else
    return %Q{<a href="#{url}">#{text}</a>}
  end
end

def itemize(html)
  if $diary_format == 'wiki' then
    return "* " + html
  else
    return "<li>" + html + "</li>"
  end
end

def make_display_url(url)
   ret = url.gsub(/^https?:\/\/(www.)?/, '')
   return ret[0..27] + "..." if ret.length > 30
   return ret
end

def direct_html(part)
  return ($diary_format == "wiki") ? ("{{'" + part + "'}}") : part
end

def icon_img(rert)
  opts = %Q{alt="[#{rert}]" border="0" style="border: 0"}
  src  = (rert.upcase == "RE") ? $reply_icon : $retweet_icon
  return %Q{<img src="#{src}" #{opts} />}
end

def tweet2html(t)
  debug("Entered tweet2html.", 0)
  debug("ID:   " + t.id.to_s)
  debug("Date: " + t.created_at.to_s)
  debug("Text: " + t.text[0..70])

  retweeted = t.attrs["retweeted_status"] || false
  html = make_link(
           "https://twitter.com/#!/#{$screen_name}/status/#{t.id}",
           t.created_at.strftime("%H:%M")
         ) + " "

  if retweeted then
    html += direct_html(icon_img("RT")) + " "
  end

  if t.in_reply_to_status_id != nil then
    link = "https://twitter.com/#!/#{t.in_reply_to_screen_name}/status/#{t.in_reply_to_status_id.to_s}"
    html += make_link(link, icon_img("Re"), true) + " "
  end

  url_table = {}

  # メディア（添付画像等）の処理
  medias    = []
  t.media.each { |m|
    url  = m.url
    murl = m.media_url
    xurl = m.expanded_url
    durl = m.display_url
    w    = m.sizes["medium"]["w"]
    h    = m.sizes["medium"]["h"]
    url_table[url] = [xurl, durl]
    tag = make_link(xurl,
      %Q{<img src="#{murl}:medium" width="#{w}" height="#{h}" border="0" />},
      true
    )
    debug("Found tagged media: #{murl}")
    medias << tag
  } if t.media

  # 短縮URLの処理（ただし Twitter 公式のみ）
  e = t.attrs["entities"]
  if e != nil then
    urls = t.attrs["entities"]["urls"]
    if urls != nil then
      urls.each do |uset|
        url  = uset["url"] || ""
        xurl = uset["expanded_url"] || ""
        durl = uset["display_url"] || ""
        if xurl.length > 0 and durl.length > 0 then
          url_table[url] = [xurl, durl]
        end
      end
    end
  end

  urire = URI.regexp(%w[http https ftp])
  idre  = /@([0-9A-Za-z_]{1,15})(?![0-9A-Za-z_])/

  txt = t.text.gsub(/\n/, "")
  debug(txt)
  html += txt.gsub(/#{urire}|#{idre}/) {|m|
    durl = m
    if url_table[m] != nil then
      debug("Expanding URL #{m} => #{url_table[m][0]}")
      durl = url_table[m][1]
      m    = url_table[m][0]
    end
    if m.match(idre) then
      debug("Found an ID link: #{m}")
      make_link("https://twitter.com/#!/#{m}", m)
    else
      thumb = get_thumb_url(m)
      if thumb != "" then
         # サムネイル画像あり
         debug("Found a thumbnail link for #{m} => #{thumb}")
         medias << make_link(m, thumb, true)
      end
      make_link(m, make_display_url(durl))
    end
  }

  if medias.size > 0 then
    html = html + direct_html('<br />') + medias.join(" ")
  end

  html = itemize(html)
  debug("Final format = #{html}")

  return html
end


# 一日分のツイートを tDiary へ投稿する
# Basically copied from 'posttdiary.rb' which can be found at
# http://docs.tdiary.org/users/?posttdiary.rb
def post_tweets(day, tweets)
  debug("Entered post_tweets.", 0)

  if tweets.size > 0 then
    html_data = $diary_heading + "\n"
    tweets.sort {|x,y| x.created_at <=> y.created_at }.reverse.each do |t|
      html_data += tweet2html(t) + "\n"
    end

    data = "year=#{day.year}&month=#{day.month}&day=#{day.day}"
    data << "&body=#{CGI::escape html_data}"
    data << "&append=true"
    data << "&makerss_update=false"

    uri = URI.parse($tdiary_url)
    inre = /<input type="hidden" name="csrf_protection_key" value="([^"]+)">/

    Net::HTTP.start(uri.host, uri.port) do |http|
      auth = ["#{$tdiary_uname}:#{$tdiary_pass}"].pack('m').strip
      res, = http.get(uri.path,
                      "Authorization" => "Basic #{auth}",
                      "Referer" => $tdiary_url)

      if inre =~ res.body then
        data << "&csrf_protection_key=#{CGI::escape(CGI::unescapeHTML($1))}"
      end

      res, = http.post(uri.path, data,
                       "Authorization" => "Basic #{auth}",
                       "Referer" => $tdiary_url)
      update_cache($lastid)
    end unless $cmd_opts[:no_tweet]

    if $cmd_opts[:no_tweet] then
      verbose(day.strftime("Suppressed posting for %Y-%m-%d."))
    else
      verbose(day.strftime("Posted tweets on %Y-%m-%d."))
    end
  end
end

##
# ここからがいわゆるメイン処理
##

MultiJson.engine = :ok_json

if $lastid > 0 then
  dayend   = 0
  daystart = 0
else
  dayend   = Time.local(*(Time.now).to_a[3..5].reverse)
  daystart = dayend - 86400
end

data = []
cont = true
days = 0

begin
  debug("Retrieving tweets.", 0)
  tweet_opts = {:include_entities => true, :include_rts => true}
  tweet_opts[:max_id] = $lastid - 1 if $lastid > 0
  tweet_opts[:count] = $twitter_maximum_count if $cmd_opts[:days] == 0

  tester = Twitter.user_timeline($screen_name, tweet_opts)
  if tester.size == 0 then
    raise "Retrieved no status on the timeline. We may be at the end."
  end
 
  tester.each do |t|

    if (daystart == 0) then
      daystart = Time.local(*(t.created_at).to_a[3..5].reverse)
      dayend   = daystart + 86400
      debug("Setting #{daystart} as an epoch.")
    end

    $lastid = t.id
    filtered = false
    $filter_phrases.each do |f|
      filtered = true if t.text.match(f)
    end if $filter_phrases.size > 0

    if dayend <= t.created_at then
      # 今日分のツイート: 何もしない
    elsif daystart <= t.created_at then
      # 対象とするツイート
      data << t unless filtered
    else
      # さらに昔のツイート
      post_tweets(daystart, data)
      days = days + 1
      if ($cmd_opts[:cont] or days < $cmd_opts[:days]) then
        data     = filtered ? [] : [t]
        dayend   = dayend   - 86400
        daystart = daystart - 86400
      else
        data = []
        cont = false
        break
      end
    end
  end
  if cont then
    debug("Seeping for #{$twitter_sleep} seconds...")
    sleep $twitter_sleep
  end
end while (cont)

if data.size > 0 then
  post_tweets(daystart, data, false)
  days = days + 1
end
