Freelance Project Requests info@CodeDocu.de Software Development in C# WPF Asp.Net Core Vba Excel Word SQL-Server EF Linq, UWP Net
#

 

 

Task:

The application scours through a full range of Ebay Classifieds.

The application is written in C #, WPF.

 

Base:

Ebay Classifieds does not filter the results for fulltext input, it only displays the results

which have the search terms in the title or in the keywords.

This makes an effective search pointless.

For example, you can not search for XTR circuits or the term NP for new price in the display

 

SQL program under Windows 10

 

 

SQL database

The values ​​are stored in a local database

Collecting several thousand records is no problem here.

 

 

 

C # Code Listing

 

 

 

 

Using Namespaces

using System;

using System.Collections.Generic;

using System.Text;

using System.Threading.Tasks;

using System.Windows;

//< add using >

using System.IO;                //*Stream, Streamreader

using System.Net;               //*NetRequest

using HtmlAgilityPack;          //*Html Website

using System.Data.SqlClient;    //*local DB

using System.Data;              //*ConnectionState, DataTable

using System.Windows.Threading;

//</ add using >

 

 

 

 

namespace WebRobot_Kleinanzeigen

{

 

    public partial class MainWindow : Window

    {

 

 

 

 

 

 

        #region Form

        //< refresh delegate >

        private delegate void EmptyDelegate();

        //</ refresh delegate >

 

        //--------------------< region: Form >---------------------

        public MainWindow()

        {

            InitializeComponent();

            optStop.IsChecked = false;

        }

        //--------------------</ region: Form >---------------------

        #endregion /Form

 

 

Region Buttons

 

        #region Buttons

        //--------------------< region: Buttons >---------------------

        private void Button_Start_Click(object sender, RoutedEventArgs e)

        {

            //--------< Button_Start_Click() >--------

            optStop.IsChecked = false;

            Scrape_all_Lists();

            //--------</ Button_Start_Click() >--------

        }

 

 

        private void Button_Details_Click(object sender, RoutedEventArgs e)

        {

            //--------< Button_Details_Click() >--------

            optStop.IsChecked = false;

            Scrape_all_Details();

            //--------</ Button_Details_Click() >--------

        }

        //--------------------</ region: Buttons >---------------------

        #endregion /Buttons

 

 

Collect list

In webrobotern you always have to first capture a list and then work out the details.

Here's collecting data from the list

#region Methods Scan

        //--------------------< region: Methods Scan >---------------------

        private void Scrape_all_Lists()

        {

            //--------< Scrape_all_Lists() >--------

            fx_Log("----< Read Lists >----");

            Textbox_URL.Text = "https://www.ebay-kleinanzeigen.de/s-fahrraeder/damen/c217+fahrraeder.art_s:damen";

            while (Textbox_URL.Text != "")

            {

                if (optStop.IsChecked == true) return;

                Scrape_List();

            }

            fx_Log("----</ Read Lists >----");

            //--------</ Scrape_all_Lists() >--------

        }

 

        private void Scrape_List()

        {

            //--------< Scrape_List() >--------

            //*freiberuflich https://www.ebay-kleinanzeigen.de/s-fahrraeder/damen/c217+fahrraeder.art_s:damen

            //*weiter mit span class=next /de/jobs/temporary/?job_types=temporary&page=2

            string sURL = Textbox_URL.Text;

            Uri baseUrl = new Uri(sURL);

 

            if (optStop.IsChecked == true) return;

 

            string sPage = baseUrl.Query;

            fx_Log("--< " + sPage + " >--");

 

            HtmlDocument doc = Web_Get_HtmlDocument(sURL);

            if (doc == null) return;

 

            //< node01 >

            HtmlNode node_Area = doc.DocumentNode.SelectSingleNode("//ul[@id=\"srchrslt-adtable\"]"); //*find subnode with

            //</ node01 >

            if (node_Area != null)

            {

                //< nodes >

                HtmlNodeCollection nodes = node_Area.SelectNodes("//h2[@class=\"text-module-begin\"]"); //*find subnode with

                //< nodes >

 

                //------< @Loop: Detail Nodes >------

                foreach (HtmlNode node in nodes)

                {

                    //----< In Detail-Node >----

                    if (optStop.IsChecked == true) return;

 

                    //--< get a_href >--

                    HtmlNode node_to_Detail = node.SelectSingleNode("a");

                    string sTitle = node_to_Detail.InnerText;

                    string sURL_Detail_relative = node_to_Detail.GetAttributeValue("href", "");

                    string sURL_Detail_absolute = new Uri(baseUrl, sURL_Detail_relative).AbsoluteUri;

 

                    db_Update_Add_ListRecord(sURL_Detail_absolute, sTitle);

 

                    //< print >

                    fx_Log(sTitle);

                    //</ print >

                    //--</ get a_href >--

                    //----</ In Detail-Node >----

                }

                //------</ @Loop: Detail Nodes >------

 

                fx_Log("--</ " + sPage + " >--");

                fx_Log("");

 

                HtmlNode node_Next = doc.DocumentNode.SelectSingleNode("//a[@class=\"pagination-next\"]");

                if (node_Next != null)

                {

                    string sURL_Next_relative = node_Next.GetAttributeValue("href", "");

                    string sURL_Next = new Uri(baseUrl, sURL_Next_relative).AbsoluteUri;

                    Textbox_URL.Text = sURL_Next;

                }

                else

                {

                    Textbox_URL.Text = "";

                }                                                                                              //</ nodes >

            }

 

 

            //--------</ Scrape_List() >--------

        }

 

 

Evaluation of all details

Subsequently, all individual websites are called up and the texts collected

#region Methods Scan

        ..

private void Scrape_all_Details()

        {

            //--------< Scrape_all_Details() >--------

            fx_Log("----< @Read Details >----");

 

            //----< @Loop: Alle Empty Records >----

            while (1 == 1)

            {

                //----< Detail >----

                if (optStop.IsChecked == true) return;

 

                //< find record >

                string sSQL = "SELECT TOP 1 [IDDetail] FROM tbl_Details WHERE [dtScan] IS NULL";

                DataRow row = clsDB.Get_DataRow(sSQL);

                //</ find record >

 

                if (row != null)

                {

                    Scrape_Detail(Convert.ToInt32(row["IDDetail"]));

                }

                else

                {

                    break;

                }

                //----</ Detail >----

            }

            //----</ @Loop: Alle Empty Records >----

 

            fx_Log("----</ @Read Details >----");

            //--------</ Scrape_all_Details() >--------

        }

 

        private void Scrape_Detail(int ID)

        {

            //--------< Scrape_Detail() >--------

            if (optStop.IsChecked == true) return;

 

            //fx_Log("--< Read Detail >--");

            fx_Log("Detail=" + ID);

 

            string sURL = clsDB.Get_Value_as_String("URL", "tbl_Details", "[IDDetail]=" + ID);

            HtmlDocument doc = Web_Get_HtmlDocument(sURL);

            //< check >

            if (doc == null)

            {

                //< update >

                string sql_Error = "UPDATE tbl_Details SET [dtScan] = SYSDATETIME() WHERE IDDetail = " + ID;

                clsDB.Execute_SQL(sql_Error);

                //</ update >

                return;

            }

            //</ check >

 

            //----< In Detail-Node >----

            //< Text >

            string sText = "";

            HtmlNode nodeText = doc.DocumentNode.SelectSingleNode("//p[@id=\"viewad-description-text\"]");

            if (nodeText != null) {

                sText = nodeText.InnerText;

                sText = sText.Trim();

            }

            //</ Text >

 

            //--< Check deleted >--

            if (sText=="")

            {

                HtmlNode nodeWarning = doc.DocumentNode.SelectSingleNode("//div[@class=\"outcomemessage-warning\"]"); //class outcomemessage-warning, ID=srchrslt-adexpired

                if (nodeWarning == null )

                {

                    return; //abbruch ohne Text

                }

                else

                {

                    //expired, deleted

                    //< delete >

                    string sql_Error = "DELETE FROM tbl_Details WHERE IDDetail = " + ID;

                    clsDB.Execute_SQL(sql_Error);

                    //</ delete >

                    return;

                }

            }

            //--</ Check deleted >--

 

            if (sText.Length>50)

            {

                fx_Log("text=" + sText.Substring(0, 50));

            }

            else

            {

                fx_Log("text=" + sText);

            }

           

 

            //< Preis >

            string sPreis = "";

            int intPreis = 0;

            HtmlNode nodePreis = doc.DocumentNode.SelectSingleNode("//h2[@id=\"viewad-price\"]");

            if (nodePreis != null)

            {

                sPreis = nodePreis.InnerText;

                sPreis = sPreis.Replace("Preis:","");

                sPreis = sPreis.Replace("€", "");

                sPreis = sPreis.Replace("VB", "");

                sPreis = sPreis.Replace(".", "");

                sPreis = sPreis.Replace("Zu verschenken", "");

                sPreis = sPreis.Trim();

               

                if(sPreis=="")

                {

                    intPreis = 0;

                }

                else

                {

                    intPreis = Convert.ToInt32(sPreis);

                }

               

            }

            //</ Preis >

 

            //--< Special >--

            string sErstellungsdatum = html_GetText_Kleinanzeigen_Info_Text(doc, "Erstellungsdatum:");  //19.02.2018

            string sSchlagwoerter = html_GetText_Kleinanzeigen_Info_Links (doc, "Schlagwörter:");

            string sArt = html_GetText_Kleinanzeigen_Info_Links(doc, "Art:");

            //--</ Special >--

 

            //< correct >

            sText = clsCheck.correct_String(sText);

            sErstellungsdatum = clsCheck.correct_String(sErstellungsdatum );

            sSchlagwoerter  = clsCheck.correct_String(sSchlagwoerter );

            sArt = clsCheck.correct_String(sArt);

            //</ correct >

 

            //Date to Iso

            //*yyyy-mm-dd

            string sDate_ISO = sErstellungsdatum.Substring(6, 4) + "-" + sErstellungsdatum.Substring(3, 2) + "-" + sErstellungsdatum.Substring(0, 2);

 

            //< update >

            string sql_Update = "UPDATE tbl_Details ";

            sql_Update += Environment.NewLine + " SET [Text]='" + sText + "',    [dtScan] = SYSDATETIME()";

            sql_Update += Environment.NewLine + " ,[Schlagwoerter]='" + sSchlagwoerter  + "',[Art]='" + sArt + "'";

            sql_Update += Environment.NewLine + " ,[Erstellungsdatum]=CAST('" + sDate_ISO + "' AS DATETIME)";

            sql_Update += Environment.NewLine + " ,[Preis]=" + intPreis ;

            //in: 19.02.2018

            //out: CAST('2009 - 05 - 25' AS DATETIME)'

            sql_Update += " WHERE IDDetail = " + ID;

            clsDB.Execute_SQL(sql_Update);

            //</ update >

 

            //----</ In Detail-Node >----

 

 

            //----< Images >----

 

            //--< Image-Area >--

            HtmlNode node_Image_Area = doc.DocumentNode.SelectSingleNode("//div[@id=\"viewad-images\"]");

            if (node_Image_Area != null)

            {

 

                HtmlNodeCollection image_Nodes = node_Image_Area.SelectNodes("//img");

                foreach(HtmlNode imgNode in image_Nodes )

                {

                    string sImageURL = imgNode.GetAttributeValue("src", "");

                    if (sImageURL.Contains("/common/")==false )

                        {

                        //sImageURL = sImageURL.Replace("/", "\/");

 

                        //< add >

                        string sql_Add = "INSERT INTO tbl_Images ([IDDetail],[URL]) VALUES(" + ID + " , '" + sImageURL + "')";

                        clsDB.Execute_SQL(sql_Add);

                            //</ add >

                    }

                }

 

 

            }

            //--</ Image-Area >--

 

            //----</ Images >----

 

 

            //fx_Log("--</ Read Detail >--");

            fx_Log("/Detail=" + ID);

            //--------</ Scrape_Detail() >--------

        }

        //--------------------< region: Methods Scan >---------------------

        #endregion Methods Scan

 

 

 

Larger tasks are summarized in methods.

 

Create HTML document from Web URL

Determine texts from specific areas

Collect links

 

      #region Methods HTML

        //--------------------< region: Methods >---------------------

        private HtmlAgilityPack.HtmlDocument Web_Get_HtmlDocument(string sURL)

        {

            //------------< fx_read_Page() >------------

            //* get the HTML Document of a website-URL     

            try

            {

                //-< init >-

                //< WebRequest and Response >

                WebRequest objRequest = WebRequest.Create(sURL);

                HttpWebResponse objResponse = (HttpWebResponse)objRequest.GetResponse();

                //</ WebRequest and Response >

 

                //< Stream and Reader >

                Stream objDataStream = objResponse.GetResponseStream();

                StreamReader TextReader = new StreamReader(objDataStream);

                //</ Stream and Reader >

                //-</ init >-

 

                //< download >

                //* Read Website to local String

                string sHTML = TextReader.ReadToEnd();

                //</ download >

 

                //< get HTMLdocument >

                //*create and load to local HtmlDocument

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(sHTML);

                //</ get HTMLdocument >

 

                //< output >

                return doc;

                //</ output >

            }

            catch (Exception)

            {

                return null;

            }

 

            //------------</ fx_read_Page() >------------

        }

 

        private string html_GetText_by_Class_Filter(HtmlDocument doc, string par_Html_Element_Type,string par_Class_ID, string parFilter)

        {

            string sReturn = "";

            //< Text >

            HtmlNode nodeText = doc.DocumentNode.SelectSingleNode("//" + par_Html_Element_Type + "[@" + par_Class_ID + "=\"" + parFilter + "\"]");

            if (nodeText != null) { sReturn = nodeText.InnerText; }

            //</ Text >

 

 

            sReturn.Trim();

            sReturn.TrimStart();

            return sReturn;

        }

 

        private HtmlNode  html_GetNode_by_Class_Filter_Innertext(HtmlDocument doc, string par_Html_Element_Type, string par_Class_ID, string parFilter,string innerText_Filter)

        {

            HtmlNode return_Node = null;

            //< Text >

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//" + par_Html_Element_Type + "[@" + par_Class_ID + "=\"" + parFilter + "\"]");

            if (nodes != null)

            {

                foreach (HtmlNode node in nodes)

                {

                    if (node.InnerText == innerText_Filter)

                    {

                        return_Node = node;

                        break;

                    }

                   

                }

            }

            //</ Text >

            return return_Node;

        }

 

        private string html_GetText_Kleinanzeigen_Info_Links(HtmlDocument doc,  string sFilter_Inner)

        {

            string sReturn = "";

            HtmlNode node_Titel = html_GetNode_by_Class_Filter_Innertext(doc, "dt", "class", "attributelist--key", sFilter_Inner);

            if (node_Titel != null)

            {

                HtmlNode node0 = node_Titel.NextSibling;

                if (node0 != null)

                {

                    HtmlNode node = node0.NextSibling;

                    if (node != null)

                    {

                        HtmlNodeCollection subLinks = node.SelectNodes(".//a");

                         foreach (HtmlNode subLink in subLinks)

                        {

                            string sTreffer  =subLink.InnerText;

                            sTreffer = sTreffer.Replace("\n","") ;

                            sTreffer=sTreffer.Trim();

                            sReturn += " " + sTreffer;

                        }

                       

                    }

                }

            }

            sReturn = sReturn.Trim();

            return sReturn;

        }

 

        private string html_GetText_Kleinanzeigen_Info_Text(HtmlDocument doc, string sFilter_Inner)

        {

            string sReturn = "";

            HtmlNode node_Titel = html_GetNode_by_Class_Filter_Innertext(doc, "dt", "class", "attributelist--key", sFilter_Inner);

            if (node_Titel != null)

            {

                HtmlNode node0 = node_Titel.NextSibling;

                if (node0 != null)

                {

                    HtmlNode node = node0.NextSibling;

                    if (node != null)

                    {

                        sReturn += node.InnerText;                       

                    }

                }

            }

            return sReturn;

        }

 

        private string html_GetText_OuterGroup_by_InnerFilter(HtmlDocument doc, string par_Html_Element_Type, string sFilter_Inner)

        {

            string sReturn = "";

 

            HtmlNode node_Inner = doc.DocumentNode.SelectSingleNode("//" + par_Html_Element_Type + "[text() = '" + sFilter_Inner + "']");

            if (node_Inner != null)

            {

                HtmlNode parent = node_Inner.ParentNode;

                node_Inner.Remove();

                sReturn = parent.InnerText;

                sReturn.Trim();

                sReturn.TrimStart();

            }

            return sReturn;

        }

        //--------------------</ region: Methods >---------------------

        #endregion /Methods Html

 

 

 

        #region Methods Data

        //--------------------< region: Methods Data >---------------------

        private void db_Update_Add_ListRecord(string sURL, string sTitle)

        {

            //--------< db_Update_Add_Record() >--------

            //*Update or add Record

            //< correct>

            sURL = sURL.Replace("'", "''");

            sTitle = sTitle.Replace("'", "''");

            //</ correct>

 

            //< find record >

            string sSQL = "SELECT TOP 1 * FROM tbl_Details WHERE [URL] Like '" + sURL + "'";

            DataTable tbl = clsDB.Get_DataTable(sSQL);

            //</ find record >

 

            if (tbl.Rows.Count == 0)

            {

                //< add >

                string sql_Add = "INSERT INTO tbl_Details ([URL],[Title],[dtList]) VALUES('" + sURL + "','" + sTitle + "',SYSDATETIME())";

                clsDB.Execute_SQL(sql_Add);

                //</ add >

            }

            else

            {

                //< update >

                string ID = tbl.Rows[0]["IDDetail"].ToString();

                string sql_Update = "UPDATE tbl_Details SET [dtList] = SYSDATETIME() WHERE IDDetail = " + ID;

                clsDB.Execute_SQL(sql_Update);

                //</ update >

            }

            //--------</ db_Update_Add_Record() >--------

        }

 

        //--------------------</ region: Methods Data >---------------------

        #endregion /Methods Data

 

        #region Sys

        //--------------------< region: Sys >---------------------

        private void fx_Log(string sLog)

        {

            //------------< fx_Log() >------------

            //* log Text to Textbox

            string sText = Textbox_Log.Text;

            sText = DateTime.Now + " " + sLog + Environment.NewLine + sText;

            if (sText.Length > 50000) { sText = sText.Substring(50000); }

            Textbox_Log.Text = sText;

            Textbox_Log.UpdateLayout();

            //< refresh >

            DoEvents();

            //UpdateLayout();

            //</ refresh >

            //------------</ fx_Log() >------------

        }

 

        protected void DoEvents()

        {

            //----< DoEvents() >----

            //* Diese Funktion uebernimmt die Unterbrechnung zur Anzeige und Eventbearbeitung in C#, WPF beim langen Loop Berechnungen

            //* mit einer Dispatcher

            //* EmptyDelegate im Header definieren

            //* using System.Windows.Threading; im Header festlegen

 

            Dispatcher.CurrentDispatcher.Invoke(DispatcherPriority.Background, new EmptyDelegate(delegate { }));

            //----</ DoEvents() >----

        }

        //--------------------</ region: Sys >---------------------

        #endregion /Sys

 

 

<Window x:Class="WebRobot_Kleinanzeigen.MainWindow"

        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"

        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"

        xmlns:d="http://schemas.microsoft.com/expression/blend/2008"

        xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"

        xmlns:local="clr-namespace:WebRobot_Kleinanzeigen"

        mc:Ignorable="d"

        Title="MainWindow" Height="550" Width="720">

    <Grid>

        <Button x:Name="Button_Start" Content="Start" HorizontalAlignment="Left" Margin="29,10,0,0" VerticalAlignment="Top" Width="75"

                Click="Button_Start_Click"                >

        </Button>

        <Label x:Name="Label_Button_Start" Content="read Lists" HorizontalAlignment="Left" VerticalAlignment="Top" Margin="117,11,0,0"/>

        <Button x:Name="Button_Details" Content="Details" HorizontalAlignment="Left" Margin="30,34,0,0" VerticalAlignment="Top" Width="75"

                Click="Button_Details_Click"

                />

        <Label x:Name="Label_Button_Details" Content="read all Details" HorizontalAlignment="Left" VerticalAlignment="Top" Margin="117,31,0,0"/>

        <RadioButton x:Name="optStop" IsChecked="False"  Content="Stop" HorizontalAlignment="Left" Margin="335,22,0,0" VerticalAlignment="Top"/>

 

        <Label x:Name="Label_URL" Content="Url:" HorizontalAlignment="Left" Margin="30,46,0,0" VerticalAlignment="Top"/>

        <TextBox x:Name="Textbox_URL" HorizontalAlignment="Left" Height="23" Margin="30,72,0,0" TextWrapping="Wrap"

                 Text="" VerticalAlignment="Top" Width="658"/>

 

        <Label x:Name="Label_Log" Content="log:" HorizontalAlignment="Left" Margin="29,100,0,0" VerticalAlignment="Top"/>

        <TextBox x:Name="Textbox_Log" HorizontalAlignment="Left" Height="385" Margin="29,126,0,0" TextWrapping="Wrap" Text=".." VerticalAlignment="Top" Width="659"/>

       

 

 

    </Grid>

</Window>

 

 

 

Deposited database

CREATE TABLE [dbo].[tbl_Details] (

    [IDDetail]         INT            IDENTITY (1, 1) NOT NULL,

    [URL]              NVARCHAR (255) NULL,

    [Title]            NVARCHAR (255) NULL,

    [Text]             NVARCHAR (MAX) NULL,

    [dtScan]           DATETIME       NULL,

    [Schlagwoerter]    NVARCHAR (255) NULL,

    [Art]              NVARCHAR (255) NULL,

    [Erstellungsdatum] DATETIME       NULL,

    [Preis]            INT            NULL,

    [dtList]           DATETIME       NULL,

    PRIMARY KEY CLUSTERED ([IDDetail] ASC)

);

 

 

 

 

And for pictures

CREATE TABLE [dbo].[tbl_Images] (

    [IDImage]  INT            IDENTITY (1, 1) NOT NULL,

    [IDDetail] INT            NULL,

    [URL]      NVARCHAR (255) NULL,

    PRIMARY KEY CLUSTERED ([IDImage] ASC)

);

 

 

Mobile
»
WPF: Drag Drop Example
»
Read text from PDF using iTextSharp
»
Send emails with WPF
»
C # Application: Webrobot to read all Ebay Kleinanzeigen
»
WPF: Navigate to a page
»
WPF: create a selectable list
»
WPF: Set Startup Window
»
Add WPF Local SQL Database to Application and Connect Data
»
WPF / UWP: Canvas with relative positions and sizes
»
C #, WPF, UWP: ScreenShot to save an app or window area as a file

.

Contact for Jobs, Project Requests: raimund.popp@microsoft-programmierer.de