manual/import.rb

230 lines
6.1 KiB
Ruby
Raw Normal View History

2013-01-29 19:26:36 -05:00
require 'nokogiri'
require 'fileutils'
require 'open-uri'
URL = 'http://ardour.org/book/export/html/5848'
FILENAME = 'drupal-export.html'
WRITE = true
DOWNLOAD_FILES = false
GET_ARDOUR_ORG_IMAGES = false
HANDLE_OTHER_IMAGES = false
OUTPUT_DIR = '_manual'
FILES_DIR = 'source'
SLUG_MAPPINGS = {
'working_with_sessions' => 'sessions',
'export_stem' => 'export',
'track_groups' => 'track_bus_groups',
'vst_support' => 'windows_vst',
'kbd_default' => 'default_bindings',
'midistep_entry' => 'midi_step_entry',
'midi_stepentry' => 'midi_step_entry'
}
MISSING_SLUGS = %w(
range_selection
track_templates
track_template
color_dialog
region_layering
round_robin_inputs
mcp_osx
mcp_new_device
)
FILES_MAPPINGS = {
'/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf',
'/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf'
}
LINK_SLUG_TO_NODE_ID = {}
def link_slug_to_node_id(slug)
slug = SLUG_MAPPINGS[slug] || slug
return nil if MISSING_SLUGS.include? slug
LINK_SLUG_TO_NODE_ID[slug] ||= begin
filename = "tmp/slug-to-node/#{slug}"
if File.exists? filename
File.read(filename).to_i
else
url = "http://ardour.org/manual/#{slug}"
puts "opening #{url}"
node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i
File.open(filename,'w+') { |f| f << node_id }
node_id
end
end
end
def register_node(node_id, path)
filename = "tmp/node-to-path/#{node_id}"
File.open(filename,'w+') { |f| f << path } unless File.exists? filename
end
def node_id_to_path!(node_id)
filename = "tmp/node-to-path/#{node_id}"
return '' unless File.exists? filename
#raise "no path for node-id #{node_id}" unless File.exists? filename
File.read(filename)
end
def process(html, level = 1, path = [], numbered_path = [])
html.search("div.section-#{level}").each_with_index do |child, i|
title = child.at('h1.book-heading').inner_text
node_id = child['id'].sub(/^node\-/,'')
slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '')
root = slug == 'the-ardour3-manual'
if root
# top level
this_path = []
this_numbered_path = []
else
numbered_slug = "%02d_%s" % [i + 1, slug, node_id]
this_path = path + [slug]
this_numbered_path = numbered_path + [numbered_slug]
end
register_node node_id, this_path.join('/')
indent = ' ' * level * 3
has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 }
output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}"
output_file = case
when root
"#{OUTPUT_DIR}/blah.html"
#when has_children
# "#{output_dir}/index.html"
else
"#{output_dir}.html"
end
content = child.dup
content.search('h1.book-heading').remove
content.search("div.section-#{level + 1}").remove
if heading = content.at('h2') and heading.inner_text == title
heading.remove
end
#puts "processing links in [#{this_path.join('/')}]"
content.search('a').each do |a|
href = a['href']
case href
when /^\/manual\/(.*)/
slug = $1
if node_id = link_slug_to_node_id(slug)
link_path = node_id_to_path! node_id
#puts " link slug [#{slug}] -> #{node_id} -> #{link_path}"
a['href'] = "/#{link_path}"
else
a['href'] = "/missing"
end
when /^(\/files\/.*)/
if DOWNLOAD_FILES
file_path = $1
if FILES_MAPPINGS[file_path]
file_path = FILES_MAPPINGS[file_path]
a['href'] = file_path
end
puts "downloading [#{file_path}] (for #{this_path.join('/')})"
filename = "#{FILES_DIR}/#{file_path}"
FileUtils.mkdir_p File.dirname(filename)
File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read }
end
end
end
content.search('img').each do |img|
src = img['src']
case src
when /^\//
if GET_ARDOUR_ORG_IMAGES
url = "http://ardour.org#{src}"
puts "getting #{url}"
img_path = "#{FILES_DIR}#{src}"
FileUtils.mkdir_p File.dirname(img_path)
File.open(img_path, 'w+') { |f| f << open(url).read }
end
when /^http/
new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'')
img['src'] = new_src
if HANDLE_OTHER_IMAGES
puts "new_src: #{new_src}"
img_path = "#{FILES_DIR}#{new_src}"
FileUtils.mkdir_p File.dirname(img_path)
puts "getting #{src}"
File.open(img_path, 'w+') { |f| f << open(src).read }
end
end
end
if WRITE
FileUtils.mkdir_p output_dir if has_children
File.open(output_file, 'w:UTF-8') do |f|
f << <<-HTML
---
layout: default
title: #{title}
---
#{content.inner_html}
HTML
if has_children
f << <<-HTML
{% children %}
HTML
end
end
end
process(child, level + 1, this_path, this_numbered_path)
end
end
unless File.exists?(FILENAME)
puts "downloading #{URL} to #{FILENAME}"
File.open(FILENAME,'w+') { |f| f << open(URL).read }
end
FileUtils.mkdir_p('tmp/node-to-path')
FileUtils.mkdir_p('tmp/slug-to-node')
process Nokogiri(File.read(FILENAME))