textstyle_current = new TextStyle ; $this->textstyle_current->name = "T0" ; $this->textstyles['T0'] = $this->textstyle_current ; $this->tags = array () ; } function get_url ( $title ) { global $xmlg ; $url = "http://" . $xmlg["site_base_url"] . "/index.php?title=" . urlencode ( $title ) ; return $url ; } function get_footnote_id ( $name , &$text ) { $name = trim ( strtolower ( $name ) ) ; if ( $name != "" && isset ( $this->footnote_index[$name] ) ) { $this->footnote_counter++ ; if ( trim ( $text ) == "" ) $text = $this->footnote_text[$name] ; return $this->footnote_counter ; } else { $this->footnote_counter++ ; if ( $name != "" ) { $this->footnote_index[$name] = $this->footnote_counter ; $this->footnote_text[$name] = $text ; } return $this->footnote_counter ; } } function get_image_frames () { $ret = "" ; foreach ( $this->image_frames AS $f ) { $name = $f->name ; $align = $f->align ; $ret .= '' . '' ; } return $ret ; } function get_image_frame ( $align , $margin = false ) { $i = "fr" . $this->image_counter ; $o->name = $i ; $o->align = $align ; $o->left = $margin && $align == 'right' ? '0.1cm' : '0cm' ; $o->right = $margin && $align == 'left' ? '0.1cm' : '0cm' ; $o->top = '0cm' ; $o->bottom = $margin ? '0.1cm' : '0cm' ; $this->image_frames[$i] = $o ; return $i ; } function get_table_style ( &$tag ) { $this->table_counter++ ; $ret = "Table" . $this->table_counter ; $this->open_tables[] = $ret ; $o->name = $ret ; $o->cols = 0 ; $this->table_styles[$ret] = $o ; $this->col_counter[$ret] = 0 ; $this->row_counter[$ret] = 0 ; return $ret ; } function get_top_table_name () { $x = array_pop ( $this->open_tables ) ; $this->open_tables[] = $x ; return $x ; } function get_column_style () { $t = $this->get_top_table_name () ; $cn = $t . "." . chr ( 65 + $this->col_counter[$t] ) ; $cc = $cn . $this->row_counter[$t] ; $this->col_counter[$t]++ ; if ( !isset ( $this->col_styles[$cn] ) ) { $this->table_styles[$t]->cols = $this->col_counter[$t] ; $o->name = $cn ; $this->col_styles[$cn] = $o ; } return $cc ; } function reset_column () { $t = $this->get_top_table_name () ; $this->col_counter[$t] = 0 ; $this->row_counter[$t]++ ; } function get_table_styles () { $ret = "" ; # Tables foreach ( $this->table_styles AS $ts ) { $ret .= '' . '' . '' ; } # Columns foreach ( $this->col_styles AS $cs ) { $ret .= '' . '' . '' ; } return $ret ; } function ensure_list_open () { if ( $this->list_is_open ) return "" ; $this->list_is_open = true ; if ( substr ( $this->listcode , -1 ) == '#' ) $o->type = 'numbered' ; else $o->type = 'bullet' ; $o->depth = strlen ( $this->listcode ) ; $o->number = count ( $this->list_list ) + 1 ; $this->list_list[] = $o ; while ( count ( $this->list_item_name ) <= $o->depth ) $this->list_item_name[] = "" ; $this->list_item_name[$o->depth] = 'PL' . $o->number ; return '' ; } function ensure_list_closed () { if ( !$this->list_is_open ) return "" ; $this->list_is_open = false ; $ret = "" ; $ot = $this->tags ; do { $x = array_pop ( $this->tags ) ; $ret .= "" ; } while ( $x != "text:list-item" && count ( $this->tags ) > 0 ) ; if ( $x != "text:list-item" ) { $ret = "" ; $this->tags = $ot ; } $ret .= "" ; return $ret ; } function get_text_style ( $find ) { $found = "" ; foreach ( $this->textstyles AS $k => $ts ) { if ( $ts->bold != $find->bold ) continue ; if ( $ts->italics != $find->italics ) continue ; if ( $ts->underline != $find->underline ) continue ; $this->textstyles[$k]->count++ ; return $ts ; } # Create new style $found = "T" . count ( $this->textstyles ) ; $find->name = $found ; $find->count = 1 ; $this->textstyles[$found] = $find ; return $find ; } function get_styles_xml () { $ret = '' ; # Default styles $ret .= '' . '' . '' . '' . '' . '' ; # Text styles foreach ( $this->textstyles AS $ts ) { if ( $ts->count == 0 ) { $ret .= '' ; $ret .= '' ; } else { $ret .= '' ; $ret .= 'italics ) $ret .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"' ; if ( $ts->bold ) $ret .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"' ; if ( $ts->underline ) { $ret .= ' style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color"' ; } $ret .= '/>' ; } $ret .= '' ; } # List styles $cm = 0.3 ; foreach ( $this->list_list AS $list ) { $l = "List_20_" . $list->number ; $p = "PL" . $list->number ; $ret .= '' ; if ( $list->depth > 1 ) { $off = $cm * $list->depth ; $ret .= '' ; } $ret .= '' ; $ret .= '' ; $off = 0 ; for ( $depth = 1 ; $depth <= 10 ; $depth++ ) { $off += $cm ; if ( $list->type == 'numbered' ) { $ret .= '' . '' . '' ; } else { $ret .= '' . '' . '' . '' ; } } $ret .= '' ; } $ret .= $this->get_image_frames () ; $ret .= $this->get_table_styles () ; $ret .= '' ; return $ret ; } function get_odt_start () { $ret = "" ; $ret .= '' ; $ret .= ' ' ; $ret .= $this->get_styles_xml () ; return $ret ; } } class element { var $name = ''; var $attrs = array (); var $children = array (); # Temporary variables for link tags var $link_target = "" ; var $link_trail = "" ; var $link_parts = array () ; /** * Parse the children ... why won't anybody think of the children? */ function sub_parse(& $tree) { $ret = '' ; $temp = "" ; foreach ($this->children as $key => $child) { if (is_string($child)) { $temp .= $child ; } elseif ($child->name != 'ATTRS') { $ret .= $this->add_temp_text ( $temp ) ; $sub = $child->parse ( $tree , "" , $this ) ; if ( $this->name == 'LINK' ) { if ( $child->name == 'TARGET' ) $this->link_target = $sub ; else if ( $child->name == 'PART' ) $this->link_parts[] = $sub ; else if ( $child->name == 'TRAIL' ) $this->link_trail = $sub ; } $ret .= $sub ; } } return $ret . $this->add_temp_text ( $temp ) ; } function fix_text ( $s ) { /* $s = html_entity_decode ( $s ) ; filter_named_entities ( $s ) ; $s = str_replace ( "&" , "&" , $s ) ; $s = str_replace ( "<" , "<" , $s ) ; $s = str_replace ( ">" , ">" , $s ) ; return utf8_decode ( $s ) ;*/ filter_named_entities ( $s ) ; $s = str_replace ( "&" , "&" , $s ) ; $s = str_replace ( "<" , "<" , $s ) ; $s = str_replace ( ">" , ">" , $s ) ; return $s ; } function add_temp_text ( &$temp ) { $s = $temp ; $temp = "" ; return $this->fix_text ( $s ) ; } function push_tag ( $tag , $params = "" ) { global $xml2odt ; $n = "<" . $tag ; if ( $params != "" ) $n .= " " . $params ; $n .= ">" ; $xml2odt->tags[] = $tag ; return $n ; } function pop_tag () { global $xml2odt ; if ( count ( $xml2odt->tags ) == 0 ) return "" ; $x = array_pop ( $xml2odt->tags ) ; return "" ; } function top_tag () { global $xml2odt ; if ( count ( $xml2odt->tags ) == 0 ) return "" ; $x = array_pop ( $xml2odt->tags ) ; $xml2odt->tags[] = $x ; return $x ; } function handle_link ( &$tree ) { # http://www.google.de global $content_provider , $xml2odt , $xmlg ; # $ot = $tree->opentags ; $sub = $this->sub_parse ( $tree ) ; # $tree->opentags = $ot ; $link = "" ; if ( isset ( $this->attrs['TYPE'] ) AND strtolower ( $this->attrs['TYPE'] ) == 'external' ) { # External link $href = htmlentities ( $this->attrs['HREF'] ) ; if ( trim ( $sub ) == "" ) { $sub = $href ; $sub = explode ( '://' , $sub , 2 ) ; $sub = explode ( '/' , array_pop ( $sub ) , 2 ) ; $sub = array_shift ( $sub ) ; } $sub = $this->fix_text ( $sub ) ; $link = '' . $sub . '' ; } else { # Internal link $link = "LINK" ; if ( count ( $this->link_parts ) > 0 ) { $link = array_pop ( $this->link_parts ) ; array_push ( $this->link_parts , $link ) ; # Compensating array_pop } $link_text = $link ; if ( $link == "" ) $link = $this->link_target ; $link .= $this->link_trail ; $ns = $content_provider->get_namespace_id ( $this->link_target ) ; if ( $ns == 6 ) { # Image $nstext = explode ( ":" , $this->link_target , 2 ) ; $target = array_pop ( $nstext ) ; $nstext = array_shift ( $nstext ) ; $text = array_pop ( $this->link_parts ) . $this->link_trail ; $href = $content_provider->get_image_url ( $target ) ; $xml2odt->image_counter++ ; $image_file = $content_provider->copyimagefromwiki ( $target , $href ) ; $image_file_full = $xmlg['image_destination'] . "/" . $image_file ; $image_file = "Pictures/" . $image_file ; # Dimensions list($i_width, $i_height, $i_type, $i_attr) = @getimagesize($image_file_full); if ( $i_width <= 0 ) { # Paranoia $i_width = 100 ; $i_height = 100 ; } $is_thumb = false ; $align = '' ; $width = '' ; foreach ( $this->link_parts AS $s ) { $s = trim ( $s ) ; if ( $s == 'thumb' ) { $is_thumb = true ; if ( $align == '' ) $align = 'right' ; if ( $width == '' ) $width = '400' ; } else if ( substr ( trim ( strtolower ( $s ) ) , -2 ) == 'px' ) { $s = trim ( strtolower ( $s ) ) ; $s = trim ( substr ( $s , 0 , strlen ( $s ) - 2 ) ) ; $width = $s * 2 ; } } if ( $width == '' ) $width = $i_width ; if ( $align == '' ) $align = 'left' ; $page_width = 1000 ; # Arbitary: page width = 1000 px if ( $width > $page_width ) $width = $page_width ; $width = $width / 100 ; $height = ( $i_height * $width ) / $i_width ; $width .= "cm" ; $height .= "cm" ; $link = "" ; $fr = $xml2odt->get_image_frame ( $align ) ; $image_counter = $xml2odt->image_counter ; if ( $is_thumb && $text != "" ) { $ofr = $xml2odt->get_image_frame ( $align , true ) ; $link .= '' ; $link .= '' ; $link .= '' ; } $link .= '' . '' . '' ; if ( $is_thumb && $text != "" ) { $link .= $text ; $link .= '' ; } } else if ( $ns == -9 ) { # Interlanguage link $sub = $this->link_target ; $nstext = explode ( ":" , $sub , 2 ) ; $name = array_pop ( $nstext ) ; $nstext = array_shift ( $nstext ) ; $sub = utf8_encode ( $sub ) ; $href = "http://{$nstext}.wikipedia.org/wiki/" . urlencode ( $name ) ; $link = '' . $sub . '' ; if ( !$xmlg['keep_interlanguage'] ) $link = "" ; # No interlanguage links? } else if ( $ns == -8 ) { # Category link if ( $link_text == "!" || $link_text == '*' ) $link = "" ; else if ( $link_text != $this->link_target ) $link = " ({$link_text})" ; else $link = "" ; $link = "" . $this->link_target . $link . "" ; if ( !$xmlg['keep_categories'] ) $link = "" ; # No category links? } else { if ( $content_provider->is_an_article ( $this->link_target ) ) { $link = "SEITEN-INTERNER LINK" ; # dub sez... working internal links $lt = ( trim ( $this->link_target ) ) ; $lt = str_replace ( "+" , " " , $lt ) ; $text = array_pop ( $this->link_parts ) ; if (!$text) $text = $lt; $link = '' . $text . '' ; #$link = "#{$lt}|outline" ; } else { $href = $xml2odt->get_url ( $this->link_target ) ; if ( count ( $this->link_parts ) == 0 ) $text = $this->link_target ; else $text = array_pop ( $this->link_parts ) ; $text .= $this->link_trail ; $link = '' . $text . '' ; } } } return $link ; } function handle_extensions ( &$tree ) { global $content_provider , $xml2odt , $xmlg ; $ret = "" ; $name = strtolower ( $this->attrs['EXTENSION_NAME'] ) ; $sub = $this->sub_parse ( $tree ) ; if ( $name == "ref" ) { if ( isset ( $this->attrs['NAME'] ) ) $fname = $this->attrs['NAME'] ; else $fname = "" ; $note_class = strtolower ( trim ( $xmlg["odt_footnote"] ) ) ; $note_style = ucfirst ( $note_class ) ; $id = $xml2odt->get_footnote_id ( $fname , $sub ) ; $ret .= '' . $id . '' ; $ret .= '' . $sub . '' ; $ret .= '' ; } else { # Unhandeled extension $ret = $sub ; } return $ret ; } function parse ( &$tree ) { global $xml2odt ; $ret = ''; $tag = $this->name; # Shortcut $old_text_style = $xml2odt->textstyle_current ; $tag_count = count ( $xml2odt->tags ) ; # Open tag if ( $tag == "SPACE" ) { return '' ; } else if ( $tag == "ARTICLE" ) { if ( $xml2odt->article_counter > 0 ) { $ret .= '' ; } $xml2odt->article_counter++ ; if ( isset ( $this->attrs['TITLE'] ) ) { $title = $this->attrs['TITLE'] ; $ret .= '' ; $ret .= urldecode ( $title ) ; $ret .= '' ; } } else if ( $tag == "TEMPLATE" ) { return "" ; } else if ( $tag == "TEMPLATEVAR" ) { return "" ; } else if ( $tag == "MAGIC_VARIABLE" ) { return "" ; } else if ( $tag == "HR" ) { return '' ; } else if ( $tag == "EXTENSION" ) { return $this->handle_extensions ( $tree ) ; } else if ( $tag == "HEADING" || substr ( $tag , 0 , 7 ) == "XHTML:H" ) { if ( $tag == "HEADING" ) $level = $this->attrs['LEVEL'] ; else $level = substr ( $tag , 7 , 1 ) ; $ret .= $this->push_tag ( "text:h" , 'text:style-name="Heading_20_' . $level . '" text:outline-level="' . $level . '"' ) ; } else if ( $tag == "BOLD" || $tag == "XHTML:B" || $tag == "XHTML:STRONG" ) { $xml2odt->textstyle_current->bold = true ; $xml2odt->textstyle_current = $xml2odt->get_text_style ( $xml2odt->textstyle_current ) ; $ret .= $this->push_tag ( "text:span" , "text:style-name=\"" . $xml2odt->textstyle_current->name . "\"" ) ; } else if ( $tag == "XHTML:U" ) { $xml2odt->textstyle_current->underline = true ; $xml2odt->textstyle_current = $xml2odt->get_text_style ( $xml2odt->textstyle_current ) ; $ret .= $this->push_tag ( "text:span" , "text:style-name=\"" . $xml2odt->textstyle_current->name . "\"" ) ; } else if ( $tag == "ITALICS" || $tag == "XHTML:I" || $tag == "XHTML:EM" ) { $xml2odt->textstyle_current->italics = true ; $xml2odt->textstyle_current = $xml2odt->get_text_style ( $xml2odt->textstyle_current ) ; $ret .= $this->push_tag ( "text:span" , "text:style-name=\"" . $xml2odt->textstyle_current->name . "\"" ) ; } else if ( $tag == "PARAGRAPH" || $tag == "XHTML:P" ) { if ( $this->top_tag() != "text:p" ) $ret .= $this->push_tag ( "text:p" , 'text:style-name="T0"' ) ; } else if ( $tag == "LIST" || $tag == "XHTML:OL" || $tag == "XHTML:UL" ) { $is_list = true ; $ret .= $xml2odt->ensure_list_closed () ; if ( $this->top_tag() == "text:p" ) { $reopen_p = true ; $ret .= $this->pop_tag () ; } if ( $tag == "LIST" ) $type = strtolower ( $this->attrs['TYPE'] ) ; else $type = "" ; if ( $type == 'numbered' || $tag == 'XHTML:OL' ) $xml2odt->listcode .= "#" ; if ( $type == 'ident' ) $xml2odt->listcode .= " " ; else $xml2odt->listcode .= "*" ; } else if ( $tag == "LINK" ) { return $this->handle_link ( $tree ) ; } else if ( $tag == "LISTITEM" || $tag == "XHTML:LI" ) { $ret .= $xml2odt->ensure_list_open () ; $tag_count = count ( $xml2odt->tags ) ; $p = $xml2odt->list_item_name[strlen($xml2odt->listcode)] ; $ret .= $this->push_tag ( "text:list-item" ) ; $ret .= $this->push_tag ( "text:p" , 'text:style-name="' . $p . '"' ) ; } else if ( $tag == "TABLE" ) { if ( $this->top_tag() == "text:p" ) { $reopen_p = true ; $ret .= $this->pop_tag () ; } $name = $xml2odt->get_table_style ( $this ) ; $ret .= $this->push_tag ( "table:table" , 'table:style-name="' . $name . '"' ) ; $other_ret = $ret ; $ret = "" ; } else if ( $tag == "TABLEROW" ) { $xml2odt->reset_column () ; $ret .= $this->push_tag ( "table:table-row" ) ; } else if ( $tag == "TABLECELL" || $tag == "TABLEHEAD" ) { $name = $xml2odt->get_column_style () ; $ret .= $this->push_tag ( "table:table-cell" , 'table:style_name="' . $name . '" office:value-type="string"' ) ; if ( $tag == "TABLEHEAD" ) $name = "Table_20_Heading" ; else $name = "Table_20_Contents" ; $ret .= $this->push_tag ( "text:p" , 'text:style-name="' . $name . '"' ) ; } else if ( $tag == "TABLECAPTION" ) { return "" ; # Skipping caption } # Children $ret .= $this->sub_parse ( $tree ) ; # Close tag $xml2odt->textstyle_current = $old_text_style ; while ( $tag_count < count ( $xml2odt->tags ) ) { $x = array_pop ( $xml2odt->tags ) ; $ret .= "" ; } if ( isset ( $is_list ) ) { $ret .= $xml2odt->ensure_list_closed () ; $xml2odt->listcode = substr ( $xml2odt->listcode , 0 , strlen ( $xml2odt->listcode ) - 1 ) ; } if ( $tag == "TABLE" ) { $t = $xml2odt->get_top_table_name () ; for ( $a = 0 ; $a < $xml2odt->table_styles[$t]->cols ; $a++ ) { $name = $t . "." . chr ( 65 + $a ) ; $other_ret .= '' ; } $ret = $other_ret . $ret ; array_pop ( $xml2odt->open_tables ) ; } if ( isset ( $reopen_p ) ) { $ret .= $this->push_tag ( "text:p" , 'text:style-name="T0"' ) ; } return $ret ; } } require_once ( "xml2tree.php" ) ; # Uses the "element" class defined above ?>