require 'nokogiri' require 'fileutils' require 'open-uri' URL = 'http://ardour.org/book/export/html/5848' FILENAME = 'drupal-export.html' WRITE = true DOWNLOAD_FILES = false GET_ARDOUR_ORG_IMAGES = false HANDLE_OTHER_IMAGES = false OUTPUT_DIR = '_manual' FILES_DIR = 'source' SLUG_MAPPINGS = { 'working_with_sessions' => 'sessions', 'export_stem' => 'export', 'track_groups' => 'track_bus_groups', 'vst_support' => 'windows_vst', 'kbd_default' => 'default_bindings', 'midistep_entry' => 'midi_step_entry', 'midi_stepentry' => 'midi_step_entry' } MISSING_SLUGS = %w( range_selection track_templates track_template color_dialog region_layering round_robin_inputs mcp_osx mcp_new_device ) FILES_MAPPINGS = { '/files/a3_mnemonic_cheatsheet.pdf' => '/files/ardour-2.8.3-bindings-x.pdf', '/files/a3_mnemonic_cheat_sheet_osx.pdf' => '/files/ardour-2.8.3-bindings-osx-a4.pdf' } LINK_SLUG_TO_NODE_ID = {} def link_slug_to_node_id(slug) slug = SLUG_MAPPINGS[slug] || slug return nil if MISSING_SLUGS.include? slug LINK_SLUG_TO_NODE_ID[slug] ||= begin filename = "tmp/slug-to-node/#{slug}" if File.exists? filename File.read(filename).to_i else url = "http://ardour.org/manual/#{slug}" puts "opening #{url}" node_id = Nokogiri(open(url)).at('#content .node')['id'].sub(/^node\-/,'').to_i File.open(filename,'w+') { |f| f << node_id } node_id end end end def register_node(node_id, path) filename = "tmp/node-to-path/#{node_id}" File.open(filename,'w+') { |f| f << path } unless File.exists? filename end def node_id_to_path!(node_id) filename = "tmp/node-to-path/#{node_id}" return '' unless File.exists? filename #raise "no path for node-id #{node_id}" unless File.exists? filename File.read(filename) end def process(html, level = 1, path = [], numbered_path = []) html.search("div.section-#{level}").each_with_index do |child, i| title = child.at('h1.book-heading').inner_text node_id = child['id'].sub(/^node\-/,'') slug = title.downcase.gsub(' ','-').gsub(/[^a-z0-9\-]/, '') root = slug == 'the-ardour3-manual' if root # top level this_path = [] this_numbered_path = [] else numbered_slug = "%02d_%s" % [i + 1, slug, node_id] this_path = path + [slug] this_numbered_path = numbered_path + [numbered_slug] end register_node node_id, this_path.join('/') indent = ' ' * level * 3 has_children = child.search("div.section-#{level + 1}").length > 0 #&& possible_children.any? { |child| child.search('div').length > 0 } output_dir = "#{OUTPUT_DIR}/#{this_numbered_path.join('/')}" output_file = case when root "#{OUTPUT_DIR}/blah.html" #when has_children # "#{output_dir}/index.html" else "#{output_dir}.html" end content = child.dup content.search('h1.book-heading').remove content.search("div.section-#{level + 1}").remove if heading = content.at('h2') and heading.inner_text == title heading.remove end #puts "processing links in [#{this_path.join('/')}]" content.search('a').each do |a| href = a['href'] case href when /^\/manual\/(.*)/ slug = $1 if node_id = link_slug_to_node_id(slug) link_path = node_id_to_path! node_id #puts " link slug [#{slug}] -> #{node_id} -> #{link_path}" a['href'] = "/#{link_path}" else a['href'] = "/missing" end when /^(\/files\/.*)/ if DOWNLOAD_FILES file_path = $1 if FILES_MAPPINGS[file_path] file_path = FILES_MAPPINGS[file_path] a['href'] = file_path end puts "downloading [#{file_path}] (for #{this_path.join('/')})" filename = "#{FILES_DIR}/#{file_path}" FileUtils.mkdir_p File.dirname(filename) File.open(filename,'w+') { |f| f << open("http://ardour.org/#{file_path}").read } end end end content.search('img').each do |img| src = img['src'] case src when /^\// if GET_ARDOUR_ORG_IMAGES url = "http://ardour.org#{src}" puts "getting #{url}" img_path = "#{FILES_DIR}#{src}" FileUtils.mkdir_p File.dirname(img_path) File.open(img_path, 'w+') { |f| f << open(url).read } end when /^http/ new_src = '/' + src.sub(/^http:\/\/[^\/]+\//,'') img['src'] = new_src if HANDLE_OTHER_IMAGES puts "new_src: #{new_src}" img_path = "#{FILES_DIR}#{new_src}" FileUtils.mkdir_p File.dirname(img_path) puts "getting #{src}" File.open(img_path, 'w+') { |f| f << open(src).read } end end end if WRITE FileUtils.mkdir_p output_dir if has_children File.open(output_file, 'w:UTF-8') do |f| f << <<-HTML --- layout: default title: #{title} --- #{content.inner_html} HTML if has_children f << <<-HTML {% children %} HTML end end end process(child, level + 1, this_path, this_numbered_path) end end unless File.exists?(FILENAME) puts "downloading #{URL} to #{FILENAME}" File.open(FILENAME,'w+') { |f| f << open(URL).read } end FileUtils.mkdir_p('tmp/node-to-path') FileUtils.mkdir_p('tmp/slug-to-node') process Nokogiri(File.read(FILENAME))