<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xml:base="http://NadeauSoftware.com" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
 <title>NadeauSoftware.com articles from September, 2007</title>
 <link>http://NadeauSoftware.com/articles/2007/09</link>
 <description>A list of articles, sorted by title.</description>
 <language>en</language>
<item>
 <title>PHP tip: How to strip symbol characters from a web page</title>
 <link>http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_symbol_characters_web_page</link>
 <description>&lt;p class=&quot;summary&quot;&gt;Most symbol characters, like + = &amp;copy; &amp;trade; &amp;larr; &amp;rarr; &amp;#x263a; &amp;#x2663; &amp;#x2660;, need to be stripped out of web page text before processing it in a  search engine or text analysis tool. For international text   there are thousands of  symbol characters, but some  should be removed in one context, but not in another. This tip shows how.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_symbol_characters_web_page&quot;&gt;read more&lt;/a&gt;&lt;/p&gt;</description>
 <comments>http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_symbol_characters_web_page#comments</comments>
 <category domain="http://NadeauSoftware.com/articles/php">PHP</category>
 <category domain="http://NadeauSoftware.com/articles/text_processing">Text processing</category>
 <pubDate>Sat, 29 Sep 2007 17:41:49 -0700</pubDate>
 <dc:creator>Dave_Nadeau</dc:creator>
 <guid isPermaLink="false">62 at http://NadeauSoftware.com</guid>
</item>
<item>
 <title>PHP tip: How to strip punctuation characters from a web page</title>
 <link>http://NadeauSoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page</link>
 <description>&lt;p class=&quot;summary&quot;&gt;When processing text for a search engine or analysis tool, code needs to strip out punctuation, formatting, spacing, and control characters to reveal indexable text. In international text there are hundreds of these characters, and some should be removed in one context, but not in another. This tip shows how.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://NadeauSoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page&quot;&gt;read more&lt;/a&gt;&lt;/p&gt;</description>
 <comments>http://NadeauSoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#comments</comments>
 <category domain="http://NadeauSoftware.com/articles/php">PHP</category>
 <category domain="http://NadeauSoftware.com/articles/text_processing">Text processing</category>
 <pubDate>Sat, 15 Sep 2007 17:39:17 -0700</pubDate>
 <dc:creator>Dave_Nadeau</dc:creator>
 <guid isPermaLink="false">61 at http://NadeauSoftware.com</guid>
</item>
<item>
 <title>PHP tip: How to strip HTML tags, scripts, and styles from a web page</title>
 <link>http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_html_tags_web_page</link>
 <description>&lt;p class=&quot;summary&quot;&gt;The HTML tags on a web page must be stripped away to get clean text for a PHP search engine, keyword extractor, or some other page analysis tool. PHP&#039;s standard &lt;code&gt;strip_tags(&amp;nbsp;)&lt;/code&gt; function will do part of the job, but you need to strip out styles, scripts, embedded objects, and other unwanted page code first.  This tip shows how.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_html_tags_web_page&quot;&gt;read more&lt;/a&gt;&lt;/p&gt;</description>
 <comments>http://NadeauSoftware.com/articles/2007/09/php_tip_how_strip_html_tags_web_page#comments</comments>
 <category domain="http://NadeauSoftware.com/articles/php">PHP</category>
 <category domain="http://NadeauSoftware.com/articles/text_processing">Text processing</category>
 <pubDate>Sat, 01 Sep 2007 20:04:39 -0700</pubDate>
 <dc:creator>Dave_Nadeau</dc:creator>
 <guid isPermaLink="false">60 at http://NadeauSoftware.com</guid>
</item>
</channel>
</rss>
