using System;

using System.Diagnostics;

using System.Drawing;

using System.Collections;

using System.IO;

using System.ComponentModel;

using System.Windows.Forms;

using mshtml;

using SHDocVw;

using System.Text.RegularExpressions;

using System.Data.OleDb;

namespace WebScraper

{

      /// <summary>

      /// This is a minimalist interface for controlling the financial data scrapers.  It provides

      /// facilities for selecting and initiating scavenging operations, logic for processing the

      /// retrieved data, and limited progress feedback.

      /// </summary>

      public class WebScraper : System.Windows.Forms.Form

      {

            /// <summary>

            /// This string constant defines the DBMS connection string. Modify the path and or name as neccessary

            /// </summary>

            const string DBMSConnection = @"Provider=Microsoft.Jet.OLEDB.4.0;Password="""";User ID=Admin;Data Source=CorporateData.mdb;Mode=Share Deny None;Extended Properties="""";Jet OLEDB:System database="""";Jet OLEDB:Registry Path="""";Jet OLEDB:Database Password="""";Jet OLEDB:Engine Type=5;Jet OLEDB:Database Locking Mode=0;Jet OLEDB:Global Partial Bulk Ops=2;Jet OLEDB:Global Bulk Transactions=1;Jet OLEDB:New Database Password="""";Jet OLEDB:Create System Database=False;Jet OLEDB:Encrypt Database=False;Jet OLEDB:Don't Copy Locale on Compact=False;Jet OLEDB:Compact Without Replica Repair=False;Jet OLEDB:SFP=False";

            /// <summary>

            /// Name of the file containing the list of ticker symbols

            /// </summary>

            const string TickerSymbolFile = "tickers.txt";

            /// <summary>

            /// Email address for where bugs (top level exception catches) should be sent.  Only

            /// applies to release builds.

            /// </summary>

            const string ProgramSupportEmail = "mark@vibrant3d.com";

            /// <summary>

            /// Defines the operating mode of the scraper

            /// </summary>

            private enum ScraperOperatingMode { Stopped,Started,Paused };

            /// <summary>

            /// Defines exactly what the scanner is doing while it is operating

            /// </summary>

            private enum ScraperOperatingState { StartStatement,InStatement,EndStatement };

            /// <summary>

            /// Defines the type of scraped data, which indicates how it will be converted from

            /// textual to internal form. 

            /// </summary>

            public enum ScrapeDataType { Dates,Dollars,Numbers};

            /// <summary>

            /// Defines individual types of financial records.

            /// </summary>

            public enum FinancialStatements {NoStatement=-1,BalanceSheet=0,CashFlows=1,Income=2,Equity=3,Trading=4,UserExpressions = 5,NumItems=6};

            /// <summary>

            /// Defines the global operating mode of the scraper to control behavior in the idle dispatch loop

            /// </summary>

            private ScraperOperatingMode m_scraperOperatingMode = ScraperOperatingMode.Stopped;

            /// <summary>

            /// Defines the internal process state when the scraper is running

            /// </summary>

            private  ScraperOperatingState m_scraperOperatingState;

            /// <summary>

            /// Defines the current statement type being scraped

            /// </summary>

            private FinancialStatements m_scraperActiveStatement;

            /// <summary>

            /// Defines the numeric index of the scraper being indexed

            /// </summary>

            private int m_scraperRecordIndex;

            /// <summary>

            /// Current statement being processed by the scanner

            /// </summary>

            private int m_scraperStatementIndex;

            /// <summary>

            /// List of ticker symbols that will drive scraper retrieval operations

            /// </summary>

            private ArrayList m_symbols;

            /// <summary>

            /// Number of records that could not be retrieved in the current pass

            /// </summary>

            private int m_numFailures;

            /// <summary>

            /// Number of statements the user has selected for scraping

            /// </summary>

            private int m_numStatementsToScrape;

            /// <summary>

            /// Reference to browser instance used to handle HTTP download and DOM text extraction

            /// </summary>

            private InternetExplorer m_browser;

            /// <summary>

            /// The time scavenging started.  Note that these values all reset for

            /// each class of operation, i.e. if you select CashFlow and Equity,

            /// the calculations about elapsed and remaining time are done for

            /// each item individually, not for your collective selection.  You

            /// may elect to modify this

            /// </summary>

           

            /// <summary>

            /// Start of statement scraping cycle

            /// </summary>

            private DateTime m_startScrapeTime;

            /// <summary>

            /// Start of individual item scraping within a statement

            /// </summary>

            private DateTime m_startItemScrapeTime;

            #region Windows Forms Designer created variables

            private System.ComponentModel.IContainer components;

            private System.Windows.Forms.GroupBox groupBox1;

            private System.Windows.Forms.CheckBox GetBalanceSheet;

            private System.Windows.Forms.CheckBox GetCashFlow;

            private System.Windows.Forms.CheckBox GetEquityInfo;

            private System.Windows.Forms.CheckBox GetIncomeStatement;

            private System.Windows.Forms.ProgressBar progressBar1;

            private System.Windows.Forms.Label performanceMsg;

            private System.Windows.Forms.Label CurrentTickerDisplay;

            private System.Windows.Forms.Label CurrentRecordDisplay;

            private System.Windows.Forms.Label TotalRecordsDisplay;

            private System.Windows.Forms.Label TotalRecoveryDisplay;

            private System.Windows.Forms.Label SuccessPercentage;

            private System.Windows.Forms.Label SecondsPerRecord;

            private System.Windows.Forms.Label ElapsedTimeDisplay;

            private System.Windows.Forms.Label RemainingTimeDisplay;

            private System.Windows.Forms.Button startButton;

            private System.Windows.Forms.Button stopButton;

            private System.Windows.Forms.Button exitButton;

            private System.Windows.Forms.Label label3;

            private System.Windows.Forms.Label label1;

            private System.Windows.Forms.Label label2;

            private System.Windows.Forms.Label label5;

            private System.Windows.Forms.Label label4;

            private System.Windows.Forms.Label label6;

            private System.Windows.Forms.Label label9;

            private System.Windows.Forms.ToolTip toolTip1;

            private System.Data.OleDb.OleDbConnection MarketDBMS;

            private System.Windows.Forms.Timer timer1;

            #endregion

            public WebScraper()

            {

                  //

                  // Required for Windows Form Designer support

                  //

                  InitializeComponent();

                  // clear the count of selected statements

                  m_numStatementsToScrape = 0;

                  // initialize the list of ticker symbols that will be used for retrieving data

                  // get the (potentially) ordered list of ticker symbols

                  ArrayList gd = loadTickerSymbols(TickerSymbolFile);

                  // randomize its contents and use that list to drive retrievals

                  m_symbols = randomizeList(gd);

                  // setup the database connection -  as usual, in their infinite wisdom, microsoft shows poor to nonexistant

                  // skills in factoring their components, i.e. the visual setup for databases ends up binding in host platform

                  // dependencies in exactly the same way as the 'references' mechanism in the solution view.  This line makes it

                  // easy to compensate for their foolishness when moving the software between various development systems

                  this.MarketDBMS.ConnectionString =DBMSConnection;

                  Application.Idle += new System.EventHandler(doOperatingCycle);

            }

            private void startButton_Click(object sender, System.EventArgs e)

            {

                  if(m_scraperOperatingMode == ScraperOperatingMode.Stopped)

                  {

                        // start scavenging operations

                        MarketDBMS.Open();

                        m_browser = new InternetExplorer();

                        m_scraperOperatingMode = ScraperOperatingMode.Started;

                        m_scraperOperatingState = ScraperOperatingState.StartStatement;

                        // reset the state index

                        m_scraperStatementIndex = 0;

                        // update the total # of records display

                        TotalRecordsDisplay.Text = m_symbols.Count.ToString();

                        forceFieldUpdate(TotalRecordsDisplay);

                        this.startButton.Text = "Pause";

                        this.stopButton.Enabled = true;

                  }

                  else if(m_scraperOperatingMode == ScraperOperatingMode.Started)

                  {

                        // pause the scavenging operations

                        this.performanceMsg.Text = "";

                        m_scraperOperatingMode = ScraperOperatingMode.Paused;

                        this.startButton.Text = "Continue";

                  }

                  else if(m_scraperOperatingMode == ScraperOperatingMode.Paused)

                  {

                        // restart the scavenging operation

                        m_scraperOperatingMode = ScraperOperatingMode.Started;

                        this.startButton.Text = "Pause";

                  }

            }

            private string m_runDisplayMessage = "STARTING";

            /// <summary>

            /// This is the root function that drives the scavenging process.  It's triggered

            /// by the user pressing the start button, and gathers the various information

            /// set in the UI to configure and drive the scavenging process

            /// </summary>

            /// <param name="sender"></param>

            /// <param name="e"></param>

            private void doOperatingCycle(object sender, System.EventArgs e)

            {

                  if(m_scraperOperatingMode != ScraperOperatingMode.Started)

                        return;

                  updateMessage(m_runDisplayMessage);

                  switch(m_scraperOperatingState)

                  {

                        case ScraperOperatingState.StartStatement :

                              m_runDisplayMessage = "STARTING";

                              // clear the recovered record count

                              TotalRecoveryDisplay.Text = "0";

                              forceFieldUpdate(TotalRecoveryDisplay);

                              // reset the recovered records display count and percentage.  This has to be

                              // done here because it only updates on successful recoveries

                              m_startScrapeTime = DateTime.Now;

                              SuccessPercentage.Text = "0";

                              forceFieldUpdate(SuccessPercentage);

                              // clear the internal failure count

                              m_numFailures = 0;

                              m_scraperRecordIndex = 0;

                              switch(m_scraperStatementIndex)

                              {

                                    case 0 :

                                          if(this.GetBalanceSheet.Checked)

                                          {

                                                m_scraperActiveStatement = FinancialStatements.BalanceSheet;

                                                m_scraperOperatingState = ScraperOperatingState.InStatement;

                                                m_runDisplayMessage = "BALANCE SHEET";

                                          }

                                          else

                                                ++m_scraperStatementIndex;

                                          break;

                                    case 1 :

                                          if(this.GetCashFlow.Checked)

                                          {

                                                m_scraperActiveStatement = FinancialStatements.CashFlows;

                                                m_scraperOperatingState = ScraperOperatingState.InStatement;

                                                m_runDisplayMessage = "CASH FLOW";

                                          }

                                          else

                                                ++m_scraperStatementIndex;

                                          break;

                                    case 2 :

                                          if(this.GetIncomeStatement.Checked)

                                          {

                                                m_scraperActiveStatement = FinancialStatements.Income;

                                                m_scraperOperatingState = ScraperOperatingState.InStatement;

                                                m_runDisplayMessage = "INCOME STMNT";

                                          }

                                          else

                                                ++m_scraperStatementIndex;

                                          break;

                                    case 3 :

                                          if(this.GetEquityInfo.Checked)

                                          {

                                                m_scraperActiveStatement = FinancialStatements.Equity;

                                                m_scraperOperatingState = ScraperOperatingState.InStatement;

                                                m_runDisplayMessage = "EQUITY INFO";

                                          }

                                          else

                                                stopScraper();

                                          break;

                              }

                        break;

                        case ScraperOperatingState.InStatement :

                              if(m_scraperRecordIndex >= m_symbols.Count)

                                    m_scraperOperatingState = ScraperOperatingState.EndStatement;

                              else

                                    scrapeFinancialInformation(m_browser,m_scraperActiveStatement,m_scraperRecordIndex++);

                              break;

                        case ScraperOperatingState.EndStatement :

                              m_runDisplayMessage = "FINISH STMNT";

                              if(++m_scraperStatementIndex < 4)

                                    m_scraperOperatingState = ScraperOperatingState.StartStatement;

                              else

                                    stopScraper();

                              break;

                  }

this.Invalidate(false);

            }

            /// <summary>

            /// Top level scavenging method.  This function handles scavenging for all generically parsed records.  It deals with

            /// a single record at a time and is repeatedly dispatched to process all records.  When it completes processing, it

            /// will set the operating state to EndStatement

            /// </summary>

            /// <param name="ie">Browser instance, used to download code and extract text from the DOM</param>

            /// <param name="scrapeSource">Specific data type to be scraped</param>

            private void scrapeFinancialInformation(InternetExplorer ie,FinancialStatements scrapeSource,int recordIndex)

            {

                  bool rslt = processRecord(ie,scrapeSource,(string) m_symbols[recordIndex]);

                  // update the current ticker symbol display

                  updateProgressMonitorFields(recordIndex,rslt);

            }

            /// <summary>

            /// This is the generic top level function that handles scavenging individual company records.

            /// It first determines the correct URL to retrieve data from, and then directs the browser

            /// to this location.  If the browser returns sufficient data to indicate the result was

            /// successful it then attempts to parse the returned data

            /// </summary>

            /// <param name="ie">Reference to web browser instance used to handle basic HTTP connection</param>

            /// <param name="srt">The type of financial statement being processed</param>

            /// <param name="sym">The current symbol being processed</param>

            /// <returns>Boolean indicating whether or not a record was successfully retrieved</returns>

            private bool processRecord(InternetExplorer ie,FinancialStatements srt,string sym)

            {

                  string theURL = "";

                  switch(srt)

                  {

                        case FinancialStatements.BalanceSheet :

                              theURL = balanceSheetURL(sym);

                              break;

                        case FinancialStatements.Income :

                              theURL = incomeStatementURL(sym);

                              break;

                        case FinancialStatements.CashFlows :

                              theURL = cashFlowURL(sym);

                              break;

                        case FinancialStatements.Equity :

                              theURL = equityStatementURL(sym);

                              break;

                  }

                  string theBody = explorerRead(theURL,ie);

                  bool rslt = false;

                  if(theBody != null && theBody.Length > 1500)

                  {

                        switch(srt)

                        {

                              case FinancialStatements.BalanceSheet :

                                    rslt = processBalanceSheetRecord(sym,theBody);

                                    break;

                              case FinancialStatements.Income :

                                    rslt = processIncomeRecord(sym,theBody);

                                    break;

                              case FinancialStatements.CashFlows :

                                    rslt = processCashFlowRecord(sym,theBody);

                                    break;

                              case FinancialStatements.Equity :

                                    rslt = processEquityRecord(sym,theBody);

                                    break;

                        }

                  }

                  return rslt;

            }

            /// <summary>

            /// Updates the display controls during the scavenging process.  This provides

            /// the user with information on the progress of their scavenging operation. It

            /// updates the progress bar, the various record counts, and the elapsed and

            /// remaining time displays

            /// </summary>

            /// <param name="currentIndex">Current record index</param>

            /// <param name="lastResultSuccessful">true if the last record was successfully retrieved</param>

            private void updateProgressMonitorFields(int currentIndex,bool lastResultSuccessful)

            {

                  // update the ticker field display

                  CurrentTickerDisplay.Text = (string) m_symbols[currentIndex];

                  if(lastResultSuccessful)

                        CurrentTickerDisplay.BackColor = Color.LightGreen;

                  else

                        CurrentTickerDisplay.BackColor = Color.LightPink;

                  CurrentTickerDisplay.Invalidate(true);

                  CurrentTickerDisplay.Update();

                  // update the progress display

                  float v1 = (float) currentIndex / (float) m_symbols.Count;

                  progressBar1.Value = (int) (v1 * 1000.0);

                  forceFieldUpdate(progressBar1);

                  // update the current record display

                  CurrentRecordDisplay.Text = currentIndex.ToString();

                  forceFieldUpdate(CurrentRecordDisplay);

                  // update the total # successfully recovered and the success percentage

                  // if the last record was successfully recovered

                  if(lastResultSuccessful)

                  {

                        // success count

                        TotalRecoveryDisplay.Text = (currentIndex + 1 - m_numFailures).ToString();

                        forceFieldUpdate(TotalRecoveryDisplay);

                        // success percentage

                        float pctg = ((float) currentIndex - m_numFailures) / (float) currentIndex;

                        SuccessPercentage.Text = (pctg * 100.0).ToString("0.00");

                        forceFieldUpdate(SuccessPercentage);

                  }

                  else  // count the failure

                        m_numFailures++;

                  // update the per record, elapsed, and remaining times

                  DateTime endItemScrapeTime = DateTime.Now;

                  TimeSpan recordTime = endItemScrapeTime - m_startItemScrapeTime;

                  SecondsPerRecord.Text = (recordTime.TotalMilliseconds / 1000.0).ToString("0.00");

                  forceFieldUpdate(SecondsPerRecord);

                  TimeSpan elapsedTime = endItemScrapeTime - m_startScrapeTime;

                 

                  ElapsedTimeDisplay.Text = timespanString(elapsedTime);

                  forceFieldUpdate(ElapsedTimeDisplay);

                  // computing the remaining time is a guess -  we just use the last record retrieval time

                  // and multiply this by the number of remaining records, but the more adventurous could

                  // implement some type of averaging function to get a more stable estimate

                  TimeSpan estRemTime = new TimeSpan(recordTime.Ticks * (m_symbols.Count - currentIndex));

                  RemainingTimeDisplay.Text = timespanString(estRemTime);

                  forceFieldUpdate(RemainingTimeDisplay);

                  m_startItemScrapeTime = endItemScrapeTime;

            }

            /// <summary>

            /// Generate the url for retrieving a specific balance sheet

            /// </summary>

            /// <param name="sym">Symbol of equity to return data for</param>

            /// <returns>URL of balance sheet for given equity</returns>

            private string balanceSheetURL(string sym)

            {

                  string firstLetter = sym.Substring(0,1);

                  return  "http://biz.yahoo.com/fin/l/" + firstLetter + "/" + sym + "_qb.html";

            }

            /// <summary>

            /// Generate the url for retrieving a specific cash flow statement

            /// </summary>

            /// <param name="sym">Symbol of equity to return data for</param>

            /// <returns>URL of cash flow statement for given equity</returns>

            private string cashFlowURL(string sym)

            {

                  string firstLetter = sym.Substring(0,1);

                  return  "http://biz.yahoo.com/fin/l/" + firstLetter + "/" + sym + "_qc.html";

            }

            /// <summary>

            /// Generate the url for retrieving a specific income statement

            /// </summary>

            /// <param name="sym">Symbol of equity to return data for</param>

            /// <returns>URL of income statement for given equity</returns>

            private string incomeStatementURL(string sym)

            {

                  string firstLetter = sym.Substring(0,1);

                  return "http://biz.yahoo.com/fin/l/" + firstLetter + "/" + sym + ".html";

            }

            /// <summary>

            /// Generate the url for retrieving a specific equity information page

            /// </summary>

            /// <param name="sym">Symbol of equity to return data for</param>

            /// <returns>URL of equity information page</returns>

            private string equityStatementURL(string sym)

            {

                  string firstLetter = sym.Substring(0,1);

                  return  "http://biz.yahoo.com/p/" + firstLetter + "/" + sym + ".html";

            }

            /// <summary>

            /// Method for reading a specific web page and extracting the text content of the page

            /// </summary>

            /// <param name="web_page_address">string giving the URL the browser is to navigate to</param>

            /// <param name="ie">browser instance</param>

            /// <returns>String containing the text contents of the page or null</returns>

            private string explorerRead(string web_page_address,InternetExplorer ie)

            {

                  System.Object nullObject = 0;

                  System.Object nullObjStr = "";

                  try

                  {

                        // set a five second delay on the timer

                        timer1.Interval = 5000;

                        timer1.Start();

                        ie.Navigate(web_page_address,ref nullObject, ref nullObjStr, ref nullObjStr, ref nullObjStr);

                  }

                  catch(Exception e)

                  {

                        timer1.Stop();

                        return null;

                  }

                  int ctr = 0;

                  while( ie.ReadyState!=tagREADYSTATE.READYSTATE_COMPLETE )

                  {

                        Application.DoEvents();

                        System.Threading.Thread.Sleep(100);

                        if(++ctr > 25)

                        {

                              timer1.Stop();

                              return null;

                        }

                  }

                  // stop the timer

                  timer1.Stop();

                  // extract and return the text contents of the retrieved page

                  return (ie.Document as IHTMLDocument2).body.outerText;

            }

            /// <summary>

            /// Setup for generic parsing of balance sheet data and database update with scraped data

            /// </summary>

            /// <param name="sym">Symbol of equity the page data is for</param>

            /// <param name="theBodyIn">The raw page data</param>

            /// <returns>Boolean indicating whether parse and dbms update were successful</returns>

            private bool processBalanceSheetRecord(string sym, string theBodyIn)

            {

                  // match patterns for terminal elements

                  ItemParseDirective[]

                        directives = {

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"POSTDATE",@"\s*Period Ending(?<POSTDATE>.+)",ScrapeDataType.Dates,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CASH",@"\s*Cash And Cash Equivalents(?<CASH>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"STI",@"\s*Short Term Investments(?<STI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"NR",@"\s*Net Receivables(?<NR>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"I",@"\s*Inventory(?<I>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OCA",@"\s*Other Current Assets(?<OCA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"LTI",@"\s*Long Term Investments(?<LTI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"TA",@"\s*Property Plant And Equipment(?<TA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"GW",@"\s*Goodwill(?<GW>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"IA",@"\s*Intangible Assets(?<IA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"AA",@"\s*Accumulated Amortization(?<AA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OA",@"\s*Other Assets(?<OA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DLTAC",@"\s*Deferred Long Term Asset Charges(?<DLTAC>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"AP",@"\s*Payables And Accrued Expenses(?<AP>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DPAY",@"\s*Short Term And Current Long Term Debt(?<DPAY>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OCL",@"\s*Other Current Liabilities(?<OCL>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"LTD",@"\s*Long Term Debt(?<LTD>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OL",@"\s*Other Liabilities(?<OL>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DLTLC",@"\s*Deferred Long Term Liability Charges(?<DLTLC>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"MI",@"\s*Minority Interest(?<MI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"NG",@"\s*Negative Goodwill(?<NG>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OW",@"\s*Misc Stocks Options Warrants(?<OW>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"RPS",@"\s*Redeemable Preferred Stock(?<RPS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"PS",@"\s*Preferred Stock(?<PS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CS",@"\s*Common Stock(?<CS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"RE",@"\s*Retained Earnings(?<RE>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"TS",@"\s*Treasury Stock(?<TS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CAPS",@"\s*Capital Surplus(?<CAPS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OSE",@"\s*Other Stockholder Equity(?<OSE>.+)",ScrapeDataType.Dollars,4),

                  };

                  int numColumsRead = 0;

                  Hashtable bsd =  processGenericRecord(theBodyIn,directives,ref numColumsRead);

                  if(bsd.Count > 0)

                  {

                        writeGenericRecord("QuarterlyBalance",sym,bsd,numColumsRead);

                        return true;

                  }

                  else

                        return false;

            }

            /// <summary>

            /// Setup for generic parsing of cash flow statement data and database update with scraped data

            /// </summary>

            /// <param name="sym">Symbol of equity the page data is for</param>

            /// <param name="theBodyIn">The raw page data</param>

            /// <returns>Boolean indicating whether parse and dbms update were successful</returns>

            private bool processCashFlowRecord(string sym, string theBodyIn)

            {

                  ItemParseDirective[]

                        directives = {

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"POSTDATE",@"\s*Period Ending:(?<POSTDATE>.+)",ScrapeDataType.Dates,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DEPR",@"\s*Depreciation(?<DEPR>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"ADNI",@"\s*Adjustments To Net Income(?<ADNI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CAR",@"\s*Changes In Accounts Receivables(?<CAR>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CL",@"\s*Changes In Liabilities(?<CL>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CI",@"\s*Changes In Inventories(?<CI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"COOA",@"\s*Changes In Other Operating Activities(?<COOA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"CE",@"\s*Capital Expenditures(?<CE>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"INV",@"\s*Investments(?<INV>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OCFIA",@"\s*Other Cashflows From Investing Activities(?<OCFIA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DP",@"\s*Dividends Paid(?<DP>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"SPS",@"\s*Sale Purchase Of Stock(?<SPS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"NB",@"\s*Net Borrowings(?<NB>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OCFFA",@"\s*Other Cashflows From Financing Activities(?<OCFFA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"EER",@"\s*Effect Of Exchange Rate(?<EER>.+)",ScrapeDataType.Dollars,4)

                                           };

                  int numColumsRead = 0;

                  Hashtable cfd = processGenericRecord(theBodyIn,directives,ref numColumsRead);

                  if(cfd.Count > 0)

                  {

                        writeGenericRecord("QuarterlyCashFlow",sym,cfd,numColumsRead);

                        return true;

                  }

                  else

                        return false;

            }

            /// <summary>

            /// Setup for generic parsing of income statement data and database update with scraped data

            /// </summary>

            /// <param name="sym">Symbol of equity the page data is for</param>

            /// <param name="theBodyIn">The raw page data</param>

            /// <returns>Boolean indicating whether parse and dbms update were successful</returns>

            private bool processIncomeRecord(string sym, string theBodyIn)

            {

                  ItemParseDirective[]

                        directives = {

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"POSTDATE",@"\s*Period Ending:(?<POSTDATE>.+)",ScrapeDataType.Dates,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"REVENUES",@"\s*Total Revenue(?<REVENUES>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"COGS",@"\s*Cost Of Revenue(?<COGS>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"RD",@"\s*Research And Development(?<RD>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"SGA",@"\s*Selling General And Administrative Expenses(?<SGA>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"NR",@"\s*Non Recurring(?<NR>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OX",@"\s*Other Operating Expenses(?<OX>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OIN",@"\s*Total Other Income And Expenses Net(?<OIN>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"INTEREST",@"\s*Interest Expense(?<INTEREST>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"IT",@"\s*Income Tax Expense(?<IT>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"NIUC",@"\s*Equity Earnings Or Loss Unconsolidated Subsidiary(?<NIUC>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"MINT",@"\s*Minority Interest(?<MINT>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"DO",@"\s*Discontinued Operations(?<DO>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"EI",@"\s*Extraordinary Items(?<EI>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"AC",@"\s*Effect Of Accounting Changes(?<AC>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"OTHER",@"\s*Other Items(?<OTHER>.+)",ScrapeDataType.Dollars,4),

                                                 new ItemParseDirective(FinancialStatements.BalanceSheet,"PSA",@"\s*Preferred Stock And Other Adjustments(?<PSA>.+)",ScrapeDataType.Dollars,4)

                                           };

                  int numColumsRead = 0;

                  Hashtable id = processGenericRecord(theBodyIn,directives,ref numColumsRead);

                  if(id.Count > 0)

                  {

                        writeGenericRecord("QuarterlyIncome",sym,id,numColumsRead);

                        return true;

                  }

                  else

                        return false;

            }

            /// <summary>

            /// This handles processing of records whose elements of interest are defined by item parse directives

            /// </summary>

            /// <param name="sym"></param>

            /// <param name="theBodyIn"></param>

            /// <param name="directives"></param>

            /// <returns></returns>

            private Hashtable processGenericRecord(string theBodyIn,ItemParseDirective[] directives,ref int columnsRead)

            {

                  string theBody = theBodyIn;

                  theBody = theBody.Replace("\r","\r\n");

                  int searchoffset = 0;

                  string[] rslts;

                  string[] vars = new String[1];

                  Hashtable finalOutput = new Hashtable();

                  columnsRead = 0;

                  for(int i = 0;i < directives.Length;i++)

                  {

                        directives[i].m_recordID = i;

                        vars[0] = directives[i].m_key;

                        rslts = this.grabData(theBody,directives[i].m_pattern,vars,ref searchoffset);

                        if(rslts != null)

                        {

                              // parse the individual results

                              // note that columnsRead is updated ONLY from the parseDateData result.  This is because this

                              // routine is not prone to error, given the format of dates.  This logic does presuppose that

                              // we're always reading in date information, however

                              switch(directives[i].m_dataType)

                              {

                                    case ScrapeDataType.Dates : 

                                          finalOutput[directives[i].m_key] = parseDateData(rslts[0].Trim(),directives[i].m_numColumns);

                                          columnsRead = ((DateTime[]) finalOutput[directives[i].m_key]).Length;

                                          break;

                                    case ScrapeDataType.Dollars : 

                                          finalOutput[directives[i].m_key] = parseNumericData(rslts[0].Trim(),directives[i].m_numColumns);

                                          break;

                                    case ScrapeDataType.Numbers : 

                                          finalOutput[directives[i].m_key] = parseNumericData(rslts[0].Trim(),directives[i].m_numColumns);

                                          break;

                              }

                        }

                  }                

                  return finalOutput;

            }

            /// <summary>

            /// Custom parser for equity information.  This is an unrolled hand modified version of the generic parser

            /// that shows a recently developed parser for some specialized information.  Once the parsing logic works

            /// and is stable, this is a candidate for being rewritten as another general parser, i.e. the logic is broken

            /// down into support methods and an ItemParseDirectives table

            /// </summary>

            /// <param name="sym">The key for the record, which will be used when writing it to the database</param>

            /// <param name="theBodyIn">The raw text recovered from the web page</param>

            /// <returns></returns>

            private  bool processEquityRecord(string sym,string theBodyIn)

            {

                  string theBody = theBodyIn;

                  theBody = theBody.Replace("\r","\r\n");

                  int searchoffset = 0;

                  string empPtn = @"\s*Employees.+:(?<emp>.+)";

                  string sctPtn = @"\s*Sector:(?<sct>.+)";

                  string instPtn = @"\s*Institutional:(?<inst>\s*[0-9]+)%";

                  string soPtn = @"Shares Outstanding(?<so>.+)";

                  string lsPtn = @"Last Split(?<ls>.+)";

                  string ftPtn = @"Float(?<ft>.+)";

                  string stPtn = @"Shares Short(?<ss>.+)";

                  string[] rslts;

                  string empStr,sctStr,instStr,lsStr,soStr,ftStr,ssStr;

                  string[] vars = new String[1];

                  vars[0] = "emp";rslts = this.grabData(theBody,empPtn,vars,ref searchoffset);

                  if(rslts != null)

                        empStr = rslts[0].Trim();

                  else

                        empStr = "1";

                  vars[0] = "sct";rslts = this.grabData(theBody,sctPtn,vars,ref searchoffset);

                  if(rslts != null)

                        sctStr = rslts[0].Trim();

                  else

                        sctStr = "Unknown";

                  vars[0] = "inst";rslts = this.grabData(theBody,instPtn,vars,ref searchoffset);

                  if(rslts != null)

                        instStr = rslts[0].Trim();

                  else

                        instStr = "0";

                  vars[0] = "so";rslts = this.grabData(theBody,soPtn,vars,ref searchoffset);

                  if(rslts != null)

                        soStr = rslts[0].Trim();

                  else

                        soStr = "0";

                  vars[0] = "ls";rslts = this.grabData(theBody,lsPtn,vars,ref searchoffset);

                  if(rslts != null)

                        lsStr = rslts[0].Trim();

                  else

                        lsStr = "NONE";

                  vars[0] = "ft";rslts = this.grabData(theBody,ftPtn,vars,ref searchoffset);

                  if(rslts != null)

                        ftStr = rslts[0].Trim();

                  else

                        ftStr = "0";

                  vars[0] = "ss";rslts = this.grabData(theBody,stPtn,vars,ref searchoffset);

                  if(rslts != null)

                        ssStr = rslts[0].Trim();

                  else

                        ssStr = "0";

                  // convert the financial values

                  float numEmpVal = new FinancialValue(empStr).Value;

                  float instOwnVal = (float) (new FinancialValue(instStr).Value / 100.0);

                  float sharesOutVal = new FinancialValue(soStr).Value;

                  float floatVal = (float) new FinancialValue(ftStr).Value;

                  float shortVal = new FinancialValue(ssStr).Value;

                  // deal with any reported split

                  DateTime splitDate = new DateTime(1970,1,1);

                  float splitFactor = 1;

                  if(lsStr.ToUpper() != "NONE")

                  {

                        int lsso = 0;

                        string facPtn = @"factor\s(?<fct>.+)\son\s(?<dat>.+)";

                        vars = new String[2];

                        vars[0] = "fct";vars[1] = "dat";

                        rslts = this.grabData(lsStr,facPtn,vars,ref lsso);

                        string fctStr = rslts[0].Trim();

                        string datStr = rslts[1].Trim();

                        splitFactor = new FinancialValue(fctStr).Value;

                        splitDate = DateTime.Parse(datStr);

                  }

                  // update the database

                  // we know this is a hack for the moment, and we're providing records for

                  // 1/1/2000,4/1/2000,7/1/200,10/1/2000 ... 2001 (4) 2002 (4)

                  DateTime writeDate = DateTime.Now;

                  DateTime limDate = new DateTime(2003,1,1);

                 

                  while(writeDate < limDate)

                  {

                        DateTime nxtWriteDate = writeDate.AddMonths(3);

                        float dtoffset = -1;

                        float sout;

                        if(writeDate > splitDate)

                              sout = sharesOutVal;

                        else

                        {

                              sout = sharesOutVal / splitFactor;

                              if(nxtWriteDate > splitDate)

                              {

                                    System.TimeSpan ts = splitDate - writeDate;

                                    dtoffset = ts.Days;

                              }

                        }

                        writeCustomEquityRecord(sym,writeDate,dtoffset,numEmpVal,instOwnVal,sout,floatVal,shortVal,sctStr);

                        writeDate = nxtWriteDate;

                  }

                  return true;

            }

            /// <summary>

            /// Convert one or more dates in a string to their corresponding internal form

            /// </summary>

            /// <param name="datastring">The raw string of dates</param>

            /// <param name="nCols">The number of dates in the string</param>

            /// <returns>A DateTime array containing the dates parsed into internal form</returns>

            private DateTime[] parseDateData(string datastring,int nCols)

            {

                  int len = datastring.Length;

                  System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex(@"\w+\s+\d+,\s+\d+");

                  MatchCollection theMatch = re.Matches(datastring);

                  if(theMatch.Count == 0)

                        return null;

                  DateTime[] a = new DateTime[theMatch.Count];

                  for(int i = 0;i < theMatch.Count;i++)

                        a[i] = DateTime.Parse(theMatch[i].Value);

                  return a;

            }

            /// <summary>

            /// Convert one or more numeric values, possible with various financial symbols or specialized markers to numeric information

            /// </summary>

            /// <param name="datastring">The raw data to parse</param>

            /// <param name="nCols">The number of distinct items expected</param>

            /// <returns>An array of doubles representing the data recovered</returns>

            private double[] parseNumericData(string datastring,int nCols)

            {

                  System.Text.RegularExpressions.Regex re;

                  re = new System.Text.RegularExpressions.Regex("N/A");

                  int offset = 0;

                  bool isNegative = false;

                  bool usingParens = false;

                  bool dollarValue = false;

                  double[] results = new double[nCols];

                  // match N/A

                  //          [(][$]digits,commas[)]

                 

                  string formnum = "";

                  for(int i = 0;i < nCols;i++)

                  {

                        if(offset >= datastring.Length)

                        {

                              results[i] = 0.0;

                              continue;

                        }

                        if(datastring[offset] == '(')

                        {

                              isNegative = true;

                              usingParens = true;

                              offset++;

                        }

                        else if(datastring[offset] == '-')

                        {

                              isNegative = true;

                              usingParens = false;

                              offset++;

                        }

                        else if(datastring[offset] == 'N')

                        {

                              if(datastring[offset + 1] == '/' && datastring[offset + 2] == 'A')

                              {

                                    results[i] = 0.0;

                                    offset += 3;

                                    continue;

                              }

                        }

                        if(datastring[offset] == '$')

                        {

                              dollarValue = true;

                              offset++;

                        }

                        // now iterate forward to pick up the number.  It may contain commas and decimal points

                        // it may be neccessary to end on the first illegal char, or because the comma ordering

                        // is wrong

                        int lastcomma = -1;

                        bool keepscanning = true;

                        while(keepscanning)

                        {

                              if(offset >= datastring.Length || ((dollarValue && datastring[offset] == '$') || datastring[offset] == '(' || datastring[offset] == 'N'))

                              {

                                    keepscanning = false;

                                    continue;

                              }

                              if(datastring[offset] == ',')

                              {

                                    if(lastcomma != -1)

                                          if(offset - lastcomma > 4)

                                          {

                                                // need to figure this out still, theres two numbers run together

                                                Debug.Assert(false);

                                          }

                                    lastcomma = offset++;

                              }

                              else if(datastring[offset] == '.')

                              {

                                    Debug.Assert(false);

                              }

                              else if(datastring[offset] >= '0' && datastring[offset] <= '9')

                              {

                                    formnum = formnum + datastring[offset++];

                              }

                              else if(datastring[offset] == ')' && usingParens)

                              {

                                    ++offset;

                                    keepscanning = false;

                                    continue;

                              }

                              else

                              {

                                    // wtf

                                    Debug.Assert(false);

                              }

                        }

                        results[i] = double.Parse(formnum) * (isNegative ? -1.0 : 1.0);

                        formnum = "";

                  }

                  return results;

            }

            /// <summary>

            /// This is the generic mechanism used by all parsing components, generic and custom, to extract

            /// a single item from the source text being processed.  It

            /// </summary>

            /// <param name="bulkText">The text to be scanned for the pattern</param>

            /// <param name="keyString">The regular expression used to define the pattern</param>

            /// <param name="vars">A string array to receive the extracted matches</param>

            /// <param name="offset">The location to start scanning, passed as a reference and updated to the next

            /// unscanned position on return</param>

            /// <returns>The extracted matches, same as 'vars'</returns>

            private string[] grabData(string bulkText,string keyString,string[] vars,ref int offset)

            {

                  System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex(keyString);

                  string substr = bulkText.Substring(offset);

                  Match theMatch = re.Match(bulkText,offset,bulkText.Length - offset);

                  if(theMatch == null)

                        return null;

                  if(!theMatch.Success)

                        return null;

                  string[] rslt = new String[vars.Length];

                  for(int i = 0;i < vars.Length;i++)

                        rslt[i] = theMatch.Result("${" + vars[i] + "}");

                  offset = theMatch.Index + theMatch.Length;

                  return rslt;

            }

            /// <summary>

            /// Write a record to the database based on the results of a generic parsing operation

            /// </summary>

            /// <param name="theTable">Name of the table to write the data to</param>

            /// <param name="sym">The ticker symbol used as a unique key for the record</param>

            /// <param name="databucket">The set of item and value pairs</param>

            private void writeGenericRecord(string theTable,string sym,Hashtable databucket,int columnsRead)

            {

                  string[] kz = new String[databucket.Count];

                  IDictionaryEnumerator de = databucket.GetEnumerator();

                 

                  // generate the parameterized SQL INSERT statement

                  int ki = 0;

                  while(de.MoveNext())

                        kz[ki++] = (string) de.Key;

                  string myInsertQuery = "INSERT INTO " + theTable + " ";

                  string myValStr = "";

                  for(int i = 0;i < kz.Length;i++)

                  {

                        if(i == 0)

                        {

                              myInsertQuery = myInsertQuery+ " (SYMBOL,";

                              myValStr = myValStr + "Values(\"" + sym + "\",";

                        }

                        else

                        {

                              myInsertQuery = myInsertQuery + ",";

                              myValStr = myValStr + ",";

                        }

                        myInsertQuery = myInsertQuery + kz[i];

                        myValStr = myValStr + "@" + kz[i].ToLower();

                  }

                 

                  myInsertQuery = myInsertQuery + ")" + myValStr + ")";

                  // set parameter values and perform database update for each information column provided

                  for(int c = 0;c < columnsRead;c++)

                  {

                        System.Data.OleDb.OleDbCommand myCommand = new System.Data.OleDb.OleDbCommand(myInsertQuery);

                        for(int i = 0;i < kz.Length;i++)

                        {

                              string ky = "@" + kz[i].ToLower();

                              if(kz[i].Equals("POSTDATE") && !(databucket[kz[i]] is string))

                              {

                                    DateTime[] dt = (DateTime[]) databucket[kz[i]];

                                    DateTime theDate = dt[c];

                                    Debug.Assert(theDate > new DateTime(1970,1,1));

                                    myCommand.Parameters.Add(ky,theDate.ToShortDateString());

                              }

                              else if(!(databucket[kz[i]] is string))

                              {

                                    double[] fd = (double[]) databucket[kz[i]];

                                    double thefloat = fd[c];

                                    myCommand.Parameters.Add(ky,thefloat.ToString());

                              }

                              else

                              {

                                    string p = (string) databucket[kz[i]];

                                    myCommand.Parameters.Add(ky,p);

                              }

                        }

                        try

                        {

                              myCommand.Connection = MarketDBMS;

                              myCommand.ExecuteNonQuery();

                        }

                        catch(OleDbException e)

                        {

                              // silently ignore duplicate record errors

                              if(e.ErrorCode == -2147467259)

                                    continue;

                              string msg = e.ToString();

                              if(MessageBox.Show (msg, "Database Error",MessageBoxButtons.OKCancel, MessageBoxIcon.Asterisk) == DialogResult.Cancel)

                                    Application.Exit();

                        }

                        catch(Exception e)

                        {

                             

                              string msg = e.ToString();

                              if(MessageBox.Show (msg, "Generic Error",MessageBoxButtons.OKCancel, MessageBoxIcon.Asterisk) == DialogResult.Cancel)

                                    Application.Exit();

                        }

                  }

            }

            /// <summary>

            /// Write a custom record to the database containing extracted equity information.  Over time, this

            /// logic can be rewritten to use the generic record writing method (writeGenericRecord)

            /// </summary>

            /// <param name="sym">The unique ticker symbol to use as a part of the record key</param>

            /// <param name="forDate">The date to use as the other part of the record key</param>

            /// <param name="chgDateOffset">A floating point representation of the date change</param>

            /// <param name="empcount">Number of employees</param>

            /// <param name="instown">Institutional ownership percentage</param>

            /// <param name="sout">Shares outstanding</param>

            /// <param name="thefloat">Share Float</param>

            /// <param name="shorts">Number of short sales</param>

            /// <param name="sector">Sector, not currently written</param>

            private void writeCustomEquityRecord(string sym,DateTime forDate,float chgDateOffset,float empcount,float instown,float sout,float thefloat,float shorts,string sector)

            {

                  string myInsertQuery =

                        "INSERT INTO QuarterlyEquity (SYMBOL,POSTDATE,CHGDATE,SHARESOUT,FLOATOUT,SHORTED,EMPLOYEES,INSTOWN) Values(@sym,@pdt,@cdt,@sot,@flt,@srt,@emp,@ion)";

                  System.Data.OleDb.OleDbCommand myCommand = new System.Data.OleDb.OleDbCommand(myInsertQuery);

                  myCommand.Parameters.Add("@sym",sym);

                  myCommand.Parameters.Add("@pdt",forDate.ToShortDateString());

                  myCommand.Parameters.Add("@cdt",chgDateOffset.ToString());

                  myCommand.Parameters.Add("@sot",sout.ToString());

                  myCommand.Parameters.Add("@flt",thefloat.ToString());

                  myCommand.Parameters.Add("@srt",shorts.ToString());

                  myCommand.Parameters.Add("@emp",empcount.ToString());

                  myCommand.Parameters.Add("@ion",instown.ToString());

                  myCommand.Connection = MarketDBMS;

                  try

                  {

                        myCommand.ExecuteNonQuery();

                  }

                  catch(OleDbException e)

                  {

                        // silently ignore duplicate record errors

                        if(e.ErrorCode == -2147467259)

                              return;

                        string msg = e.ToString();

                        if(MessageBox.Show (msg, "Database Error",MessageBoxButtons.OKCancel, MessageBoxIcon.Asterisk) == DialogResult.Cancel)

                              Application.Exit();

                  }

                  catch(Exception e)

                  {

                             

                        string msg = e.ToString();

                        if(MessageBox.Show (msg, "Generic Error",MessageBoxButtons.OKCancel, MessageBoxIcon.Asterisk) == DialogResult.Cancel)

                              Application.Exit();

                  }

            }

            /// <summary>

            /// User interface support method used to express a timespan in a string representing hours, minutes, and seconds

            /// </summary>

            /// <param name="theSpan">Timespan to convert to a string</param>

            /// <returns>String in HH:MM:SS form</returns>

            private string timespanString(TimeSpan theSpan)

            {

                  int hrs = theSpan.Hours;

                  int mins = theSpan.Minutes;

                  int sec = theSpan.Seconds;

                  return hrs.ToString("00") + ":" + mins.ToString("00") + ":" + sec.ToString("00");

            }    

     

            /// <summary>

            /// Ensure that a field updates its visible display contents right now.  The fact this function exists at

            /// all is deplorable, as C# should provide some facility for constructing basic macros for operations

            /// like this.  As the C# doesn't have a preprocessor for reasons I can't begin to fathom, that isn't an

            /// option.

            /// </summary>

            /// <param name="c">The control that is to be updated</param>

            private void forceFieldUpdate(Control c)

            {

                  c.Invalidate();c.Update();

            }

            private int m_statusScrollOffset = 0;

            private void updateMessage(string msgtext)

            {

                  int padlen = 60 - msgtext.Length;

                  string fullmsg = msgtext.PadLeft(padlen - m_statusScrollOffset,' ').PadRight(m_statusScrollOffset);

                  performanceMsg.Text = fullmsg;

                  if(++m_statusScrollOffset + msgtext.Length >= 60)

                        m_statusScrollOffset = 0;

            }

            /// <summary>

            /// Clean up any resources being used.

            /// </summary>

            protected override void Dispose( bool disposing )

            {

                  if( disposing )

                  {

                        if(components != null)

                        {

                              components.Dispose();

                        }

                  }

                  base.Dispose( disposing );

            }

            #region Windows Form Designer generated code

            /// <summary>

            /// Required method for Designer support - do not modify

            /// the contents of this method with the code editor.

            /// </summary>

            private void InitializeComponent()

            {

                  this.components = new System.ComponentModel.Container();

                  this.CurrentTickerDisplay = new System.Windows.Forms.Label();

                  this.progressBar1 = new System.Windows.Forms.ProgressBar();

                  this.performanceMsg = new System.Windows.Forms.Label();

                  this.startButton = new System.Windows.Forms.Button();

                  this.MarketDBMS = new System.Data.OleDb.OleDbConnection();

                  this.timer1 = new System.Windows.Forms.Timer(this.components);

                  this.groupBox1 = new System.Windows.Forms.GroupBox();

                  this.GetBalanceSheet = new System.Windows.Forms.CheckBox();

                  this.GetCashFlow = new System.Windows.Forms.CheckBox();

                  this.GetEquityInfo = new System.Windows.Forms.CheckBox();

                  this.GetIncomeStatement = new System.Windows.Forms.CheckBox();

                  this.toolTip1 = new System.Windows.Forms.ToolTip(this.components);

                  this.CurrentRecordDisplay = new System.Windows.Forms.Label();

                  this.TotalRecordsDisplay = new System.Windows.Forms.Label();

                  this.TotalRecoveryDisplay = new System.Windows.Forms.Label();

                  this.SuccessPercentage = new System.Windows.Forms.Label();

                  this.SecondsPerRecord = new System.Windows.Forms.Label();

                  this.ElapsedTimeDisplay = new System.Windows.Forms.Label();

                  this.RemainingTimeDisplay = new System.Windows.Forms.Label();

                  this.stopButton = new System.Windows.Forms.Button();

                  this.exitButton = new System.Windows.Forms.Button();

                  this.label2 = new System.Windows.Forms.Label();

                  this.label1 = new System.Windows.Forms.Label();

                  this.label3 = new System.Windows.Forms.Label();

                  this.label5 = new System.Windows.Forms.Label();

                  this.label4 = new System.Windows.Forms.Label();

                  this.label6 = new System.Windows.Forms.Label();

                  this.label9 = new System.Windows.Forms.Label();

                  this.groupBox1.SuspendLayout();

                  this.SuspendLayout();

                  //

                  // CurrentTickerDisplay

                  //

                  this.CurrentTickerDisplay.Location = new System.Drawing.Point(8, 200);

                  this.CurrentTickerDisplay.Name = "CurrentTickerDisplay";

                  this.CurrentTickerDisplay.Size = new System.Drawing.Size(48, 23);

                  this.CurrentTickerDisplay.TabIndex = 0;

                  this.CurrentTickerDisplay.Text = "TICKER";

                  this.CurrentTickerDisplay.TextAlign = System.Drawing.ContentAlignment.MiddleLeft;

                  this.toolTip1.SetToolTip(this.CurrentTickerDisplay, "Current record ticker symbol");

                  //

                  // progressBar1

                  //

                  this.progressBar1.Location = new System.Drawing.Point(56, 200);

                  this.progressBar1.Maximum = 1000;

                  this.progressBar1.Name = "progressBar1";

                  this.progressBar1.Size = new System.Drawing.Size(232, 23);

                  this.progressBar1.TabIndex = 1;

                  this.toolTip1.SetToolTip(this.progressBar1, "Record download progress display");

                  //

                  // performanceMsg

                  //

                  this.performanceMsg.Location = new System.Drawing.Point(16, 240);

                  this.performanceMsg.Name = "performanceMsg";

                  this.performanceMsg.Size = new System.Drawing.Size(272, 18);

                  this.performanceMsg.TabIndex = 0;

                  this.toolTip1.SetToolTip(this.performanceMsg, "Informational Messages");

                  //

                  // startButton

                  //

                  this.startButton.Enabled = false;

                  this.startButton.Location = new System.Drawing.Point(16, 272);

                  this.startButton.Name = "startButton";

                  this.startButton.TabIndex = 3;

                  this.startButton.Text = "Start";

                  this.toolTip1.SetToolTip(this.startButton, "Start/Pause Scraping");

                  this.startButton.Click += new System.EventHandler(this.startButton_Click);

                  //

                  // timer1

                  //

                  this.timer1.Tick += new System.EventHandler(this.timer1_Tick);

                  //

                  // groupBox1

                  //

                  this.groupBox1.Controls.AddRange(new System.Windows.Forms.Control[] {

                                                                                                                              this.GetBalanceSheet,

                                                                                                                              this.GetCashFlow,

                                                                                                                              this.GetEquityInfo,

                                                                                                                              this.GetIncomeStatement});

                  this.groupBox1.Location = new System.Drawing.Point(8, 8);

                  this.groupBox1.Name = "groupBox1";

                  this.groupBox1.Size = new System.Drawing.Size(272, 88);

                  this.groupBox1.TabIndex = 5;

                  this.groupBox1.TabStop = false;

                  this.groupBox1.Text = "Select Data for Scavenging";

                  //

                  // GetBalanceSheet

                  //

                  this.GetBalanceSheet.Location = new System.Drawing.Point(16, 24);

                  this.GetBalanceSheet.Name = "GetBalanceSheet";

                  this.GetBalanceSheet.TabIndex = 0;

                  this.GetBalanceSheet.Text = "Balance Sheet";

                  this.toolTip1.SetToolTip(this.GetBalanceSheet, "Get Balance Sheet Information");

                  this.GetBalanceSheet.CheckedChanged += new System.EventHandler(this.GetFinancialStatement_CheckedChanged);

                  //

                  // GetCashFlow

                  //

                  this.GetCashFlow.Location = new System.Drawing.Point(136, 24);

                  this.GetCashFlow.Name = "GetCashFlow";

                  this.GetCashFlow.Size = new System.Drawing.Size(120, 24);

                  this.GetCashFlow.TabIndex = 0;

                  this.GetCashFlow.Text = "Cash Flow";

                  this.toolTip1.SetToolTip(this.GetCashFlow, "Get Cash Flow Statement Information");

                  this.GetCashFlow.CheckedChanged += new System.EventHandler(this.GetFinancialStatement_CheckedChanged);

                  //

                  // GetEquityInfo

                  //

                  this.GetEquityInfo.Location = new System.Drawing.Point(16, 56);

                  this.GetEquityInfo.Name = "GetEquityInfo";

                  this.GetEquityInfo.TabIndex = 0;

                  this.GetEquityInfo.Text = "Equity Info";

                  this.toolTip1.SetToolTip(this.GetEquityInfo, "Get Equity Information");

                  this.GetEquityInfo.CheckedChanged += new System.EventHandler(this.GetFinancialStatement_CheckedChanged);

                  //

                  // GetIncomeStatement

                  //

                  this.GetIncomeStatement.Location = new System.Drawing.Point(136, 56);

                  this.GetIncomeStatement.Name = "GetIncomeStatement";

                  this.GetIncomeStatement.Size = new System.Drawing.Size(120, 24);

                  this.GetIncomeStatement.TabIndex = 0;

                  this.GetIncomeStatement.Text = "Income Statement";

                  this.toolTip1.SetToolTip(this.GetIncomeStatement, "Get Income Statement Information");

                  this.GetIncomeStatement.CheckedChanged += new System.EventHandler(this.GetFinancialStatement_CheckedChanged);

                  //

                  // CurrentRecordDisplay

                  //

                  this.CurrentRecordDisplay.Location = new System.Drawing.Point(16, 104);

                  this.CurrentRecordDisplay.Name = "CurrentRecordDisplay";

                  this.CurrentRecordDisplay.Size = new System.Drawing.Size(64, 16);

                  this.CurrentRecordDisplay.TabIndex = 6;

                  this.CurrentRecordDisplay.Text = "0";

                  this.CurrentRecordDisplay.TextAlign = System.Drawing.ContentAlignment.MiddleRight;

                  this.toolTip1.SetToolTip(this.CurrentRecordDisplay, "Number of records checked");

                  //

                  // TotalRecordsDisplay

                  //

                  this.TotalRecordsDisplay.Location = new System.Drawing.Point(128, 104);

                  this.TotalRecordsDisplay.Name = "TotalRecordsDisplay";

                  this.TotalRecordsDisplay.Size = new System.Drawing.Size(72, 16);

                  this.TotalRecordsDisplay.TabIndex = 6;

                  this.TotalRecordsDisplay.Text = "0";

                  this.TotalRecordsDisplay.TextAlign = System.Drawing.ContentAlignment.MiddleLeft;

                  this.toolTip1.SetToolTip(this.TotalRecordsDisplay, "Total number of records to check");

                  //

                  // TotalRecoveryDisplay

                  //

                  this.TotalRecoveryDisplay.Location = new System.Drawing.Point(16, 128);

                  this.TotalRecoveryDisplay.Name = "TotalRecoveryDisplay";

                  this.TotalRecoveryDisplay.Size = new System.Drawing.Size(64, 16);

                  this.TotalRecoveryDisplay.TabIndex = 6;

                  this.TotalRecoveryDisplay.Text = "0";

                  this.TotalRecoveryDisplay.TextAlign = System.Drawing.ContentAlignment.MiddleRight;

                  this.toolTip1.SetToolTip(this.TotalRecoveryDisplay, "Number of records successfully updated");

                  //

                  // SuccessPercentage

                  //

                  this.SuccessPercentage.Location = new System.Drawing.Point(168, 128);

                  this.SuccessPercentage.Name = "SuccessPercentage";

                  this.SuccessPercentage.Size = new System.Drawing.Size(48, 16);

                  this.SuccessPercentage.TabIndex = 6;

                  this.SuccessPercentage.Text = "0";

                  this.SuccessPercentage.TextAlign = System.Drawing.ContentAlignment.MiddleRight;

                  this.toolTip1.SetToolTip(this.SuccessPercentage, "% Successfully Retrieved");

                  //

                  // SecondsPerRecord

                  //

                  this.SecondsPerRecord.Location = new System.Drawing.Point(96, 184);

                  this.SecondsPerRecord.Name = "SecondsPerRecord";

                  this.SecondsPerRecord.Size = new System.Drawing.Size(32, 16);

                  this.SecondsPerRecord.TabIndex = 6;

                  this.SecondsPerRecord.Text = "0.00";

                  this.toolTip1.SetToolTip(this.SecondsPerRecord, "Time to download a single record");

                  //

                  // ElapsedTimeDisplay

                  //

                  this.ElapsedTimeDisplay.Location = new System.Drawing.Point(16, 160);

                  this.ElapsedTimeDisplay.Name = "ElapsedTimeDisplay";

                  this.ElapsedTimeDisplay.Size = new System.Drawing.Size(72, 16);

                  this.ElapsedTimeDisplay.TabIndex = 6;

                  this.ElapsedTimeDisplay.Text = "00.00.00";

                  this.toolTip1.SetToolTip(this.ElapsedTimeDisplay, "Current Run Time of Scraper");

                  //

                  // RemainingTimeDisplay

                  //

                  this.RemainingTimeDisplay.Location = new System.Drawing.Point(152, 160);

                  this.RemainingTimeDisplay.Name = "RemainingTimeDisplay";

                  this.RemainingTimeDisplay.Size = new System.Drawing.Size(64, 16);

                  this.RemainingTimeDisplay.TabIndex = 6;

                  this.RemainingTimeDisplay.Text = "00.00.00";

                  this.toolTip1.SetToolTip(this.RemainingTimeDisplay, "Estimated Remaining Time for current record");

                  //

                  // stopButton

                  //

                  this.stopButton.Enabled = false;

                  this.stopButton.Location = new System.Drawing.Point(104, 272);

                  this.stopButton.Name = "stopButton";

                  this.stopButton.TabIndex = 3;

                  this.stopButton.Text = "Stop";

                  this.toolTip1.SetToolTip(this.stopButton, "Stop Scraping and Reset");

                  this.stopButton.Click += new System.EventHandler(this.stopButton_Click);

                  //

                  // exitButton

                  //

                  this.exitButton.Location = new System.Drawing.Point(200, 272);

                  this.exitButton.Name = "exitButton";

                  this.exitButton.TabIndex = 3;

                  this.exitButton.Text = "Exit";

                  this.toolTip1.SetToolTip(this.exitButton, "Exit Scraper");

                  this.exitButton.Click += new System.EventHandler(this.exitButton_Click);

                  //

                  // label2

                  //

                  this.label2.Location = new System.Drawing.Point(96, 104);

                  this.label2.Name = "label2";

                  this.label2.Size = new System.Drawing.Size(24, 16);

                  this.label2.TabIndex = 6;

                  this.label2.Text = "of";

                  //

                  // label1

                  //

                  this.label1.Location = new System.Drawing.Point(208, 104);

                  this.label1.Name = "label1";

                  this.label1.Size = new System.Drawing.Size(64, 16);

                  this.label1.TabIndex = 6;

                  this.label1.Text = "processed.";

                  //

                  // label3

                  //

                  this.label3.Location = new System.Drawing.Point(88, 128);

                  this.label3.Name = "label3";

                  this.label3.Size = new System.Drawing.Size(64, 16);

                  this.label3.TabIndex = 6;

                  this.label3.Text = "recovered";

                  //

                  // label5

                  //

                  this.label5.Location = new System.Drawing.Point(232, 128);

                  this.label5.Name = "label5";

                  this.label5.Size = new System.Drawing.Size(72, 16);

                  this.label5.TabIndex = 6;

                  this.label5.Text = "% success";

                  //

                  // label4

                  //

                  this.label4.Location = new System.Drawing.Point(136, 184);

                  this.label4.Name = "label4";

                  this.label4.Size = new System.Drawing.Size(64, 16);

                  this.label4.TabIndex = 6;

                  this.label4.Text = "secs/record";

                  //

                  // label6

                  //

                  this.label6.Location = new System.Drawing.Point(96, 160);

                  this.label6.Name = "label6";

                  this.label6.Size = new System.Drawing.Size(48, 16);

                  this.label6.TabIndex = 6;

                  this.label6.Text = "elapsed";

                  //

                  // label9

                  //

                  this.label9.Location = new System.Drawing.Point(232, 160);

                  this.label9.Name = "label9";

                  this.label9.Size = new System.Drawing.Size(56, 16);

                  this.label9.TabIndex = 6;

                  this.label9.Text = "remaining";

                  //

                  // WebScraper

                  //

                  this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);

                  this.ClientSize = new System.Drawing.Size(296, 302);

                  this.Controls.AddRange(new System.Windows.Forms.Control[] {

                                                                                                              this.CurrentRecordDisplay,

                                                                                                              this.groupBox1,

                                                                                                              this.progressBar1,

                                                                                                              this.CurrentTickerDisplay,

                                                                                                              this.performanceMsg,

                                                                                                              this.startButton,

                                                                                                              this.label2,

                                                                                                              this.TotalRecordsDisplay,

                                                                                                              this.label1,

                                                                                                              this.TotalRecoveryDisplay,

                                                                                                              this.label3,

                                                                                                              this.SuccessPercentage,

                                                                                                              this.label5,

                                                                                                              this.label4,

                                                                                                              this.SecondsPerRecord,

                                                                                                              this.label6,

                                                                                                              this.ElapsedTimeDisplay,

                                                                                                              this.RemainingTimeDisplay,

                                                                                                              this.label9,

                                                                                                              this.stopButton,

                                                                                                              this.exitButton});

                  this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.Fixed3D;

                  this.Name = "WebScraper";

                  this.Text = "WebScraper";

                  this.groupBox1.ResumeLayout(false);

                  this.ResumeLayout(false);

            }

            #endregion

            /// <summary>

            /// Method to throw an exception if the page retrieval times out.  This prevents the system

            /// from waiting indefinitely for a page return.  This logic could be extended to retry page

            /// requests a finite number of times before timing out

            /// </summary>

            /// <param name="sender"></param>

            /// <param name="e"></param>

            private void timer1_Tick(object sender, System.EventArgs e)

            {

                  throw new Exception("timed out");

            }

            /// <summary>

            /// User interface support routine that controls enabling of the start button.  It will

            /// enable the start button whenever one or more financial statements are selected for

            /// retrieval

            /// </summary>

            /// <param name="sender">Control that changed</param>

            /// <param name="e">Control event information, ignored</param>

            private void GetFinancialStatement_CheckedChanged(object sender, System.EventArgs e)

            {

                  if(((CheckBox) sender).Checked)

                        ++m_numStatementsToScrape;

                  else

                        --m_numStatementsToScrape;

                  if(m_numStatementsToScrape > 0)

                        this.startButton.Enabled = true;

                  else

                        this.startButton.Enabled = false;

            }

            /// <summary>

            /// Read in the set of ticker symbols from a file

            /// </summary>

            /// <param name="theFile">The name of the file containing the ticker symbols</param>

            /// <returns>An ArrayList of strings representing the ticker symbols</returns>

            private ArrayList loadTickerSymbols(string theFile)

            {

                  // get a filestream for pulling in the ticker data

                  FileStream tickerData = File.Open("Tickers.txt",FileMode.Open,FileAccess.Read);

                  // initialize the ticker symbol array

                  ArrayList syms  = new ArrayList();

                  // get a stream reader to suck the data in from

                  StreamReader sr = new StreamReader(tickerData);

                  string theLine;

                  // read in the set of ticker symbols, one per line. Use

                  // the trim function to drop any whitespace trying to

                  // sneak in

                  while((theLine = sr.ReadLine()) != null)

                  {

                        string s = theLine.Trim();

                        // make sure we don't put any blank lines into the array, that would

                        // break various future assumptions

                        if(s.Length > 0)

                              syms.Add(s);

                  }

                  return syms;

            }

            /// <summary>

            /// Randomize the ordering of the ticker symbols.  This addresses the fact that

            /// some sites may be suspicious of large numbers of queries coming in from a

            /// single IP address in sorted order.  They may also be suspicious of large numbers

            /// of requests from a single IP in any order, but thats another issue that this

            /// code does not deal with.

            /// </summary>

            /// <param name="sourceList"></param>

            /// <returns></returns>

            private ArrayList randomizeList(ArrayList sourceList)

            {

                  // create a new list to receive the randomized entries

                  ArrayList reorderedList = new ArrayList();

                  // fire up a random number generator

                  Random r = new Random();

                  // iterate over the collection of items

                  while(sourceList.Count > 0)

                  {

                        if(sourceList.Count == 1)

                        {

                              reorderedList.Add(sourceList[0]);

                              // we could force the while to terminate appropriately, but it's

                              // easier just to bail out

                              break;

                        }

                        else

                        {

                              // this is somewhat sloppy, because we're forcing a large number

                              // of list reallocations, but it makes the logic pretty straightforward.

                              // get a random value between 0 and the number of elements remaining to be randomized

                              int i = r.Next(sourceList.Count);

                              // use that value to pull a value out of the ordered list and stick it in the

                              // randomized list

                              reorderedList.Add(sourceList[i]);

                              // strip the moved item from the source list so we can use our simpleminded

                              // random index approach

                              sourceList.RemoveAt(i);

                              sourceList.TrimToSize();

                        }

                  }

                  return reorderedList;

            }

            static void Main()

            {

                  // This combined conditional build and exception handler is a technique I saw

                  // and like very much.  If you're debugging, it just blows up in the debugger, but

                  // if you're running a retail version, uncaught/unhandled errors get snagged and

                  // something reasonable appears

#if DEBUG

                  Application.Run(new WebScraper());

#else

                  try

                  {

                        Application.Run(new WebScraper());

                  }

                  catch (Exception e)

                  {

                        MessageBox.Show("Something unexpected has happened. Please email " + ProgramSupportEmail + " with this information. \n\n\n" + e.ToString());

                  }

#endif

            }

            /// <summary>

            /// Handle user stop operations.  Dispatches to internal stopScraper function as this isn't the

            /// only way in which the scraper may be stopped

            /// </summary>

            /// <param name="sender"></param>

            /// <param name="e"></param>

            private void stopButton_Click(object sender, System.EventArgs e)

            {

                  stopScraper();

            }

            private void stopScraper()

            {

                  MarketDBMS.Close();

                  this.performanceMsg.Text = "";

                  m_scraperOperatingMode = ScraperOperatingMode.Stopped;

                  stopButton.Enabled = false;

                  startButton.Text = "Start";

                  startButton.Enabled = (

                        this.GetBalanceSheet.Checked ||

                        this.GetCashFlow.Checked ||

                        this.GetEquityInfo.Checked ||

                        this.GetIncomeStatement.Checked);

            }

            private void exitButton_Click(object sender, System.EventArgs e)

            {

                  Application.Exit();

            }

      }

      /// <summary>

      /// Data structure that carries information neccessary to parse an individual datum recovered

      /// from a scraped web page

      /// </summary>

      public class ItemParseDirective

      {

            public ItemParseDirective(WebScraper.FinancialStatements rt,string k,string p,WebScraper.ScrapeDataType dt,int nc)

            {

                  m_recordType = rt;

                  m_key = k;

                  m_pattern = p;

                  m_dataType = dt;

                  m_numColumns = nc;

            }

            // statement

            public WebScraper.FinancialStatements m_recordType;

            // column

            public int m_recordID;

            // variable key

            public string m_key;

            // parse string

            public string m_pattern;

            // information type

            public WebScraper.ScrapeDataType m_dataType;

            // how many columns of data

            public int m_numColumns;

      }

}