manual/import.rb

require 'nokogiri'
require 'fileutils'
require 'open-uri'

URL = 'http://ardour.org/book/export/html/5848'
FILENAME = 'drupal-export.html'

WRITE = true
DOWNLOAD_FILES = false
GET_ARDOUR_ORG_IMAGES = false
HANDLE_OTHER_IMAGES = false

OUTPUT_DIR = '_manual'

FILES_DIR = 'source'

SLUG_MAPPINGS = {
    'working_with_sessions' => 'sessions',
    'export_stem' => 'export',
    'track_groups' => 'track_bus_groups',
    'vst_support' => 'windows_vst',
    'kbd_default' => 'default_bindings',
    'midistep_entry' => 'midi_step_entry',
    'midi_stepentry' => 'midi_step_entry'
}

MISSING_SLUGS = %w(
    range_selection
    track_templates
    track_template
    color_dialog
    region_layering
    round_robin_inputs
    mcp_osx
    mcp_new_device
)

FILES_MAPPINGS = {
    '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf',
    '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf'
}

LINK_SLUG_TO_NODE_ID = {}

def link_slug_to_node_id(slug)

    slug = SLUG_MAPPINGS[slug] || slug

    return nil if MISSING_SLUGS.include? slug

    LINK_SLUG_TO_NODE_ID[slug] ||= begin
        filename = "tmp/slug-to-node/#{slug}"

        if File.exists? filename
            File.read(filename).to_i
        else
            url = "http://ardour.org/manual/#{slug}"
            puts "opening #{url}"
            node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i
            File.open(filename,'w+') { |f| f << node_id }
            node_id
        end
    end
end


def register_node(node_id, path)
    filename = "tmp/node-to-path/#{node_id}"
    File.open(filename,'w+') { |f| f << path } unless File.exists? filename
end

def node_id_to_path!(node_id)
    filename = "tmp/node-to-path/#{node_id}"
    return '' unless File.exists? filename
    #raise "no path for node-id #{node_id}" unless File.exists? filename
    File.read(filename)
end

def process(html, level = 1, path = [], numbered_path = [])
    html.search("div.section-#{level}").each_with_index do |child, i|

        title = child.at('h1.book-heading').inner_text

        node_id = child['id'].sub(/^node\-/,'')


        slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '')

        root = slug == 'the-ardour3-manual'

        if root

            # top level

            this_path = []
            this_numbered_path = []
        else
            numbered_slug = "%02d_%s" % [i + 1, slug, node_id]

            this_path = path + [slug]
            this_numbered_path = numbered_path + [numbered_slug]
        end

        register_node node_id, this_path.join('/')

        indent = ' ' * level * 3

        has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 }

        output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}"

        output_file = case
        when root
            "#{OUTPUT_DIR}/blah.html"
        #when has_children
        #    "#{output_dir}/index.html"
        else
            "#{output_dir}.html"
        end

        content = child.dup

        content.search('h1.book-heading').remove
        content.search("div.section-#{level + 1}").remove

        if heading = content.at('h2') and heading.inner_text == title
            heading.remove
        end

        #puts "processing links in [#{this_path.join('/')}]"

        content.search('a').each do |a|
            href = a['href']
            case href
            when /^\/manual\/(.*)/
                slug = $1
                if node_id = link_slug_to_node_id(slug)
                    link_path = node_id_to_path! node_id
                    #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}"
                    a['href'] = "/#{link_path}"
                else
                    a['href'] = "/missing"
                end

            when /^(\/files\/.*)/

                if DOWNLOAD_FILES
                    file_path = $1


                    if FILES_MAPPINGS[file_path]
                        file_path = FILES_MAPPINGS[file_path]
                        a['href'] = file_path
                    end

                    puts "downloading [#{file_path}] (for #{this_path.join('/')})"

                    filename = "#{FILES_DIR}/#{file_path}"
                    FileUtils.mkdir_p File.dirname(filename)
                    File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read }
                end
            end
        end

        content.search('img').each do |img|

            src = img['src']

            case src
            when /^\//
                if GET_ARDOUR_ORG_IMAGES
                    url = "http://ardour.org#{src}"
                    puts "getting #{url}"
                    img_path = "#{FILES_DIR}#{src}"
                    FileUtils.mkdir_p File.dirname(img_path)
                    File.open(img_path, 'w+') { |f| f << open(url).read }
                end
            when /^http/
                new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'')
                img['src'] = new_src

                if HANDLE_OTHER_IMAGES
                    puts "new_src: #{new_src}"
                    img_path = "#{FILES_DIR}#{new_src}"
                    FileUtils.mkdir_p File.dirname(img_path)
                    puts "getting #{src}"
                    File.open(img_path, 'w+') { |f| f << open(src).read }
                end
            end

        end

        if WRITE
            FileUtils.mkdir_p output_dir if has_children
            File.open(output_file, 'w:UTF-8') do |f|
                f << <<-HTML
---
layout: default
title: #{title}
---

#{content.inner_html}
                HTML

                if has_children
                    f << <<-HTML
{% children %}
                    HTML
                end


            end
        end

        process(child, level + 1, this_path, this_numbered_path)
    end
end


unless File.exists?(FILENAME)
    puts "downloading #{URL} to #{FILENAME}"
    File.open(FILENAME,'w+') { |f| f << open(URL).read }
end

FileUtils.mkdir_p('tmp/node-to-path')
FileUtils.mkdir_p('tmp/slug-to-node')

process Nokogiri(File.read(FILENAME))