HtmlAgilityPack Help

inkedGFX

Well-known member
Joined
Feb 2, 2013
Messages
142
Programming Experience
Beginner
I am struggling with parsing a web page using htmlagilitypack.....what I am trying to do is pull links for auctions from each page ..there are 33 pages with 25 links on each ...I can get this far.....my problem is I need to get the auction title and description from each of the 25 auctions on each of the 33 pages.this is easier said than done.....I can show the code that works up to the point where I get all the links...but then I get a null exception when trying to get the title and description....this is one of the pages where I need the title and description from

AutoTrader Classics - 1967 Mercedes-Benz 600 | Import Classics | Burbank, CA

if you view source you will see it isn't formatted the greatest.....below is the code for grabbing all the links ..to each individual page....

 public void Parse_AutoTrader(ListBox lstUrls, ListBox lstCarLinks, ListBox Auction)
        {
           
                string path = AppDomain.CurrentDomain.BaseDirectory + "\\AutoTraderClassics_AuctionLinks.txt";
               


                using (StreamReader reader = new StreamReader(path))
                {
                    string line;


                    while (!reader.EndOfStream)
                    {
                        line = reader.ReadLine();
                        urlList.Add(line);
                    }
                }
                lstUrls.ItemsSource = urlList;


                int index = 0;
                if (lstUrls.SelectedIndex == -1)
                {
                    lstUrls.SelectedIndex++;
                }


                foreach (string item in urlList)
                {


                    index++;
                    viewCarLinks.Add("Page " + index);


                    HtmlWeb mainWeb = new HtmlWeb();
                    var page = mainWeb.Load(lstUrls.SelectedItem.ToString());


                    HtmlNodeCollection linkNodes = page.DocumentNode.SelectNodes("//div[@class='column col2 ']/h2[@class='title']/a");


                    foreach (HtmlNode node in linkNodes)
                    {
                        string link = node.Attributes["href"].Value;
                        string title = node.InnerText;
                        viewCarLinks.Add(link);


                    }
                    lstUrls.SelectedIndex++;
                }


                lstCarLinks.ItemsSource = viewCarLinks;
        }


btw.... the code snippet below adds the same data in each line of the listbox....any idea why?

  public void Parse_IdealClassicCars(ListBox lstUrls, ListBox lstCarLinks, ListView auctions)
        {
            urlList.Clear();
            string path = "http://www.idealclassiccars.net/inventory";


            HtmlWeb idealWeb = new HtmlWeb();
            HtmlDocument page = idealWeb.Load(path);


            HtmlNodeCollection auctionNode = page.DocumentNode.SelectNodes("//table/tr/td/a");
            HtmlNodeCollection auctionItem = page.DocumentNode.SelectNodes("//table/tr[@class='row row1']");
           // HtmlNodeCollection href = auctionNode[0].SelectNodes("/a");


            foreach (HtmlNode node in auctionNode)
            {
               
                urlList.Add(node.Attributes["href"].Value);
            }


            foreach (HtmlNode item in auctionItem)
            {
                string desc = page.DocumentNode.SelectSingleNode("//td[@itemprop='description']").InnerText;
                string model = page.DocumentNode.SelectSingleNode("//td[@itemprop='model']").InnerText;
                
                auctionList.Add(new Auction(desc, model));


            }


            lstUrls.ItemsSource = urlList;
            auctions.ItemsSource = auctionList;
        }


like I said before this works.....any help would be appreciated

Thank You
InkedGFX
 
Last edited:
Top Bottom