12. Extract uppercase words from a file, extract unique words using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading; using System.IO; namespace lrn2CSharp12 { class Program { static void Main( string[ ] args ) { string file = ""; string line = ""; string[ ] words; List<string> upperWords = new List<string>( ); List<string> uniqueWords = new List<string>( ); Console.WriteLine( "Input name of a file to read and count words." ); Console.Write( ":" ); try { file = Console.ReadLine( ); FileStream fs = new FileStream( file, FileMode.Open, FileAccess.Read ); StreamReader sr = new StreamReader( fs ); while ( !sr.EndOfStream ) { line = sr.ReadLine( ); words = null; if ( line.Length != 0 ) words = line.Split( ' ' ); if ( words != null ) { foreach ( string w in words ) { if ( char.IsUpper( w[ 0 ] ) ) upperWords.Add( w ); if ( !uniqueWords.Contains( w ) ) uniqueWords.Add( w ); } } } sr.Close( ); fs.Close( ); } catch { Console.WriteLine( "WTF?!?" ); } Console.WriteLine( "Upper Words:" ); foreach ( string s in upperWords ) { Console.Write( s ); Console.WriteLine( ); } Console.WriteLine( ); Console.WriteLine( "Unique Words:" ); foreach ( string s in uniqueWords ) { Console.Write( s ); Console.WriteLine( ); } Console.WriteLine( ); Console.WriteLine( "Press esc to exit." ); while ( !keyPressHandler( Console.ReadKey( true ) ) ) { Thread.Sleep( 250 ); /* no op */ } } private static Boolean keyPressHandler( ConsoleKeyInfo input ) { if ( input.Key == ConsoleKey.Escape ) return true; return false; } } }

posted by dharh 1:08 AM May 30th, 2011

11. Input is HTML table, Remove all tags and put data in a comma/tab separated file.

Part of this was an exercise in looking up what others have already done.

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Data; using System.Text.RegularExpressions; using System.Threading; namespace lrn2CSharp11 { class Program { static void Main( string[ ] args ) { string file = ""; string html = ""; DataSet ds = null; StringBuilder csv = new StringBuilder( ); Console.WriteLine( "Input name of a file with an HTML table in the current directory to convert to csv." ); Console.Write( ":" ); try { file = Console.ReadLine( ); FileStream fs = new FileStream( file, FileMode.Open, FileAccess.Read ); StreamReader sr = new StreamReader( fs ); html = sr.ReadToEnd( ); sr.Close( ); StreamWriter sw = new StreamWriter( fs.Name + ".csv" ); ds = ConvertHTMLTablesToDataSet( html ); if ( ds != null ) { foreach ( DataTable dtc in ds.Tables ) { int iColCount = dtc.Columns.Count; for ( int i = 0; i < iColCount; i++ ) { sw.Write( dtc.Columns[ i ] ); if ( i < iColCount - 1 ) { sw.Write( "," ); } } sw.WriteLine( ); foreach ( DataRow dr in dtc.Rows ) { for ( int i = 0; i < iColCount; i++ ) { if ( !Convert.IsDBNull( dr[ i ] ) ) { sw.Write( dr[ i ].ToString( ) ); } if ( i < iColCount - 1 ) { sw.Write( "," ); } } sw.WriteLine( ); } sw.WriteLine( ); } } sw.Close( ); fs.Close( ); } catch { Console.WriteLine( "WTF?!?" ); } Console.WriteLine( "Press esc to exit." ); while ( !keyPressHandler( Console.ReadKey( true ) ) ) { Thread.Sleep( 250 ); /* no op */ } } private static Boolean keyPressHandler( ConsoleKeyInfo input ) { if ( input.Key == ConsoleKey.Escape ) return true; return false; } private static DataSet ConvertHTMLTablesToDataSet( string HTML ) { DataTable dt; DataSet ds = new DataSet( ); dt = new DataTable( ); string TableExpression = "<table[^>]*>(.*?)</table>"; string HeaderExpression = "<th[^>]*>(.*?)</th>"; string RowExpression = "<tr[^>]*>(.*?)</tr>"; string ColumnExpression = "<td[^>]*>(.*?)</td>"; bool HeadersExist = false; int iCurrentColumn = 0; int iCurrentRow = 0; MatchCollection Tables = Regex.Matches( HTML, TableExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase ); foreach ( Match Table in Tables ) { iCurrentRow = 0; HeadersExist = false; dt = new DataTable( ); if ( Table.Value.Contains( "<th" ) ) { HeadersExist = true; MatchCollection Headers = Regex.Matches( Table.Value, HeaderExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase ); foreach ( Match Header in Headers ) { dt.Columns.Add( Header.Groups[ 1 ].ToString( ) ); } } else { int columns = Regex.Matches( Regex.Matches( Regex.Matches( Table.Value, TableExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase )[ 0 ].ToString( ), RowExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase )[ 0 ].ToString( ), ColumnExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase ).Count; for ( int iColumns = 1; iColumns <= columns; iColumns++ ) { dt.Columns.Add( "Column " + System.Convert.ToString( iColumns ) ); } } MatchCollection Rows = Regex.Matches( Table.Value, RowExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase ); foreach ( Match Row in Rows ) { if ( !( ( iCurrentRow == 0 ) & HeadersExist ) ) { DataRow dr = dt.NewRow( ); iCurrentColumn = 0; MatchCollection Columns = Regex.Matches( Row.Value, ColumnExpression, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase ); foreach ( Match Column in Columns ) { dr[ iCurrentColumn ] = Column.Groups[ 1 ].ToString( ); iCurrentColumn++; } dt.Rows.Add( dr ); } iCurrentRow++; } ds.Tables.Add( dt ); } return ds; } } }

posted by dharh 11:42 PM May 29th, 2011


« Previous
1
Next »


2011: 5 3 1
2010: 12 9 7 1
2009: 12 11 8 5
2008: 12 5 4 3 2 1
2007: 12 11 10 9 8 7 6 5 4 3 2 1
2006: 12 11 10 9 8 7 6 5 4 3 2 1
2005: 12 10 7 6
2004: 10 9 6 5 4 3 2 1