DocBook PDF" ; } if ( isset ( $xmlg['docbook']['command_html'] ) ) { $optional[] = "DocBook HTML" ; } if ( isset ( $xmlg['zip_odt'] ) ) { $optional[] = "OpenOffice XML" ; $optional[] = "OpenOffice ODT" . "References as endnotes (instead of footnotes)" ; } $optional = "
" . implode ( "
" , $optional ) ; # dub sez... working images if ( $as_extension ) $site = "" ; else $site = "Site : http:///index.php
" ; $additional = array() ; if ( $xmlg['allow_get'] ) { $additional[] = "This page can be called with parameters: w2x.php?doit=1&whatsthis=articlelist&site=en.wikipedia.org/w&output_format=odt&text=Biochemistry" ; $additional[] = "For additional parameters, see here" ; } # Plain text translation options $a = array ( 'en' => 'English', 'de' => 'German', 'fr' => 'French', 'es' => 'Spanish', 'it' => 'Italian', ) ; asort ( $a ) ; $tttlo = "" ; foreach ( $a AS $b => $c ) { $tttlo .= "" ; } $additional = "

" . implode ( "
" , $additional ) . "

" ; return "

Known issues:

In templates, {{{variables}}} used within <nowiki> tags will be replaced as well (too lazy to strip them)
HTML comments are removed (instead of converted into XML tags)

{$additional}

" ; } function get_param ( $s , $default = NULL ) { global $xmlg ; if ( $xmlg['allow_get'] ) { if ( isset ( $_REQUEST[$s] ) ) { return $_REQUEST[$s] ; } else { return $default ; } } else { if ( isset ( $_POST[$s] ) ) { return $_POST[$s] ; } else { return $default ; } } } # add one article to the stack of to-be-converted articles function push_article ( &$aArticles, $article ) { # convert _ to ' ' $a = trim( $article ); if ( $a != "" ) { $aArticles[] = preg_replace( '/_/', ' ', $a ); } } # Append XML, or links to XML temporary files function append_to_xml ( &$xml , $new_xml ) { global $xmlg ; if ( $xmlg["use_xml_temp_files"] ) { # Use temp files if ( !is_array ( $xml ) ) $xml = array () ; do { $tmp_file_name = tempnam ( $xmlg["temp_dir"] , "XMLTMP" ) ; $tmp_file = fopen($tmp_file_name, 'wb') ; } while ( $tmp_file === false ) ; fwrite ( $tmp_file , $new_xml ) ; fclose ( $tmp_file ) ; $xml[] = $tmp_file_name ; } else { # Do not use temp files $xml .= $new_xml ; } } # Returns the next article XML, or false function xml_shift ( &$xml ) { if ( !is_array ( $xml ) ) { # Do not use temp files if ( $xml != '' ) { $x = $xml ; $xml = array () ; return $x ; } return false ; } else { # Use temp files if ( count ( $xml ) == 0 ) return false ; $x = array_shift ( $xml ) ; $ret = file_get_contents ( $x ) ; unlink ( $x ) ; return $ret ; } } # Free temporary XML files, if any # Should not be necessary if xml_shift was used function xml_cleanup ( &$xml ) { global $xmlg ; if ( !$xmlg["use_xml_temp_files"] ) return ; # not using temp files if ( !is_array ( $xml ) ) return false ; foreach ( $xml AS $x ) { unlink ( $x ) ; } $xml = array () ; } ## MAIN PROGRAM if ( get_param('doit',false) ) { # Process $wikitext = stripslashes ( get_param('text') ) ; if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone $content_provider = new ContentProviderHTTP ; } else { # MediaWiki extension $content_provider = new ContentProviderMySQL ; } $converter = new MediaWikiConverter ; $xmlg["useapi"] = isset ( $_REQUEST['useapi'] ) ; $xmlg["book_title"] = get_param('document_title'); $xmlg["site_base_url"] = get_param('site') ; $xmlg["resolvetemplates"] = get_param('use_templates','all') ; $xmlg['templates'] = explode ( "\n" , get_param('templates','') ) ; $xmlg['add_gfdl'] = get_param('add_gfdl',false) ; $xmlg['keep_interlanguage'] = get_param('keep_interlanguage',false) ; $xmlg['keep_categories'] = get_param('keep_categories',false) ; # the article list $aArticles = array () ; $t = microtime_float() ; $xml = "" ; $format = get_param('output_format') ; $whatsthis = get_param('whatsthis') ; # Catch listnamepage if ( $whatsthis == "listpagename" ) { $listpage = trim ( array_shift ( explode ( "\n" , $wikitext ) ) ) ; $wikitext = $content_provider->get_wiki_text ( $listpage ) ; $lines = explode ( "\n" , $wikitext ) ; $wikitext = array () ; foreach ( $lines AS $l ) { $l1 = substr ( $l , 0 , 1 ) ; if ( $l1 != '*' && $l1 != '#' && $l1 != ':' ) continue ; $l = explode ( '[[' , $l , 2 ) ; $l = trim ( array_shift ( explode ( ']]' , array_pop ( $l ) , 2 ) ) ) ; if ( $l == '' ) continue ; $wikitext[] = $l ; } $wikitext = implode ( "\n" , $wikitext ) ; $whatsthis = 'articlelist' ; } # QUICK HACK! NEEDS TO WORK! if ( $format == "odt" || $format == "odt_xml" || $format == "docbook_pdf" || $format == "docbook_html" || $format == "docbook_xml" ) { $xmlg["allow_xml_temp_files"] = false ; } if ( $whatsthis == "wikitext" ) { $content_provider->first_title = "Raw wikitext page" ; $wiki2xml_authors = array () ; $xml = $converter->article2xml ( "" , $wikitext , $xmlg ) ; } else { if ( $xmlg['allow_xml_temp_files'] ) $xmlg['use_xml_temp_files'] = true ; foreach ( explode ( "\n" , $wikitext ) AS $a ) { push_article( $aArticles, $a ); } # set the first article name as the default title if ($xmlg["book_title"] == '') { $xmlg["book_title"] = $aArticles[0]; } # as long as we have articles to convert (this might change in between!) while ( $a = array_shift( $aArticles ) ) { $wiki2xml_authors = array () ; # Article page|Article name $a = explode ( '|' , $a ) ; if ( count ( $a ) == 1 ) $a[] = $a[0] ; $title_page = trim ( array_shift ( $a ) ) ; $title_name = trim ( array_pop ( $a ) ) ; $wikitext = $content_provider->get_wiki_text ( $title_page ) ; add_authors ( $content_provider->authors ) ; append_to_xml ( $xml , $converter->article2xml ( $title_name , $wikitext , $xmlg, $aArticles ) ) ; #$xml .= $converter->article2xml ( $title_name , $wikitext , $xmlg, &$aArticles ) ; } } $t = microtime_float() - $t ; $tt = round( $t, 3 ) ; $lt = round( $content_provider->load_time, 3 ) ; $t = round( $t - $lt, 3) ; $xmlg['xml_articles_header'] = "" ; # Output format if ( $format == "xml" ) { header('Content-type: text/xml; charset=utf-8'); print "\n" ; print xml_articles_header() ; while ( $x = xml_shift ( $xml ) ) print $x ; print "" ; } else if ( $format == "text" ) { $xmlg['plaintext_markup'] = get_param('plaintext_markup',false) ; $xmlg['plaintext_prelink'] = get_param('plaintext_prelink',false) ; $out = $converter->articles2text ( $xml , $xmlg ) ; $out = str_replace ( "\n" , "
" , $out ) ; header('Content-type: text/html; charset=utf-8'); print $out ; } else if ( $format == "translated_text" ) { $xmlg['plaintext_markup'] = false ; $xmlg['plaintext_prelink'] = false ; $out = $converter->articles2text ( $xml , $xmlg ) ; #$out = str_replace ( "\n" , "
" , $out ) ; #header('Content-type: text/html; charset=utf-8'); #print $out ; $out = explode ( "\n" , $out ) ; array_shift ( $out ) ; $out = trim ( implode ( "\n" , $out ) ) ; $source_language = array_shift ( explode ( '.' , $xmlg["site_base_url"] ) ) ; $target_language = get_param ( 'translated_text_target_language' , 'en' ) ; $langpair = urlencode ( "{$source_language}|{$target_language}" ) ; $url = "http://www.google.com/translate_t?langpair={$langpair}&text=" . urlencode ( utf8_decode ( $out ) ) ; echo file_get_contents ( $url ) ; } else if ( $format == "xhtml" ) { $xmlg['xhtml_justify'] = get_param ( 'xhtml_justify' , false ) ; $xmlg['xhtml_logical_markup'] = get_param ( 'xhtml_logical_markup' , false ) ; $xmlg['xhtml_source'] = get_param ( 'xhtml_source' , false ) ; if ( $xmlg['xhtml_source'] ) { header('Content-type: text/xml; charset=utf-8'); #header('Content-type: text/html; charset=utf-8'); $s = $converter->articles2xhtml ( $xml , $xmlg ) ; $s = str_replace ( '>' , ">\n" , $s ) ; $s = str_replace ( '<' , "\n<" , $s ) ; $s = str_replace ( "\n\n" , "\n" , $s ) ; echo trim ( $s ) ; #echo str_replace ( "\n" , '
' , htmlentities ( trim ( $s ) ) ) ; } else { # Header hack for IE if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) { header("Content-type: application/xhtml+xml"); } else { header("Content-type: text/html"); } echo $converter->articles2xhtml ( $xml , $xmlg ) ; } } else if ( $format == "odt" || $format == "odt_xml" ) { if ( isset ( $_REQUEST['odt_footnote'] ) ) $xmlg["odt_footnote"] = 'endnote' ; if ( $xmlg['sourcedir'] == '.' ) $cwd = getcwd() ; else $cwd = $xmlg['sourcedir'] ; $template_file = $cwd . '/template.odt' ; $dir_file = tempnam($xmlg["temp_dir"], "ODD"); $dir = $dir_file . "-DIR" ; $xmlg['image_destination'] = $dir . "/Pictures" ; $zipdir = $cwd ; if ( isset ( $xmlg["zip_odt_path"] ) ) # Windows strange bug workaround $zipdir = $xmlg["zip_odt_path"] ; chdir ( $zipdir ) ; # Unzip template $cmd = $xmlg['unzip_odt'] ; $cmd = str_replace ( '$1' , escapeshellarg ( $template_file ) , $cmd ) ; $cmd = str_replace ( '$2' , escapeshellarg ( $dir ) , $cmd ) ; exec ( $cmd ) ; # Convert XML to ODT chdir ( $cwd ) ; if ( $format == "odt_xml" ) $content_provider->block_file_download = true ; $out = $converter->articles2odt ( $xml , $xmlg ) ; chdir ( $zipdir ) ; # Create ODT structure $handle = fopen ( $dir . "/content.xml" , "w" ) ; if ($handle) { fwrite ( $handle , $out ) ; fclose ( $handle ) ; # Generate temporary ODT file $out_file = tempnam('', "ODT"); $cmd = $xmlg['zip_odt'] ; $cmd = str_replace ( '$1' , escapeshellarg ( $out_file ) , $cmd ) ; if ( $xmlg['is_windows'] ) { $cmd = str_replace ( '$2' , escapeshellarg ( $dir . "/" ) , $cmd ) ; } else { $cmd = str_replace ( '$2' , escapeshellarg ( './' ) , $cmd ) ; # linux/unix zip needs to be in the directory, otherwise it will # include needless parts into the directory structure chdir ($dir); # remove the output if it for some reason already exists } @unlink ( $out_file ) ; exec ( $cmd ) ; if ( $format == "odt" ) { # Return ODT file $filename = $xmlg["book_title"] ; if (!preg_match('/\.[a-zA-Z]{3}$/',$filename)) { $filename .= '.odt'; } if (!preg_match('/\.[a-zA-Z]{3}$/',$out_file)) { $out_file .= '.zip'; } header('Content-type: application/vnd.oasis.opendocument.text; charset=utf-8'); header('Content-Disposition: inline; filename="'.$filename.'"'); # XXX TODO: error handling here $handle = fopen($out_file, 'rb'); fpassthru ( $handle ) ; fclose ( $handle ) ; } else { # Return XML header('Content-type: text/xml; charset=utf-8'); print str_replace ( ">" , ">\n" , $out ) ; } # Cleanup SureRemoveDir ( $dir ) ; @rmdir ( $dir ) ; @unlink ( $dir_file ) ; @unlink ( $out_file ) ; chdir ( $cwd ) ; } # error occured } else if ( $format == "docbook_xml" ) { $out = $converter->articles2docbook_xml ( $xml , $xmlg ) ; header('Content-type: text/xml; charset=utf-8'); print $out ; } else if ( $format == "docbook_pdf" || $format == "docbook_html" ) { $filetype = substr ( $format , 8 ) ; $filename = $converter->articles2docbook_pdf ( $xml , $xmlg , strtoupper ( $filetype ) ) ; if ( file_exists ( $filename ) ) { $fp = fopen($filename, 'rb'); if ( $format == "docbook_pdf" ) { header('Content-Type: application/pdf'); header("Content-Length: " . (string) filesize($filename)); header('Content-Disposition: attachment; filename="'.$xmlg["book_title"].'.pdf"'); } else if ( $format == "docbook_html" ) { header('Content-Type: text/html'); header("Content-Length: " . (string) filesize($filename)); header('Content-Disposition: inline; filename="'.$xmlg["book_title"].'.html"'); } fpassthru($fp); fclose ( $fp ) ; } # Cleanup $pdf_dir = dirname ( dirname ( $filename ) ) ; SureRemoveDir ( $pdf_dir ) ; @rmdir ( $pdf_dir ) ; } xml_cleanup ( $xml ) ; exit ; } else { # Show the form if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone header('Content-type: text/html; charset=utf-8'); print "

Magnus' magic MediaWiki-to-XML-to-stuff converter

All written in PHP - so portable, ~~so incredibly slow...~~ about as fast as the original MediaWiki parser! (For the source, see here, trunk 'wiki2xml', directory 'php')

" ; $xmlg["useapi"] = 1 ; print get_form () ; print "" ; } else { # MediaWiki extension $out = get_form ( true ) ; } } #Automatically resolve templates
?>

Paste article list or wikitext here

Magnus' magic MediaWiki-to-XML-to-stuff converter