diff --git a/Data-Extraction/Getting-Started/ASP.NETCore/Recognize_Forms/Recognize_Forms/Data/Input.pdf b/Data-Extraction/Getting-Started/ASP.NETCore/Recognize_Forms/Recognize_Forms/Data/Input.pdf index 839bbb72..80f58e30 100644 Binary files a/Data-Extraction/Getting-Started/ASP.NETCore/Recognize_Forms/Recognize_Forms/Data/Input.pdf and b/Data-Extraction/Getting-Started/ASP.NETCore/Recognize_Forms/Recognize_Forms/Data/Input.pdf differ diff --git a/Data-Extraction/Getting-Started/Console/.NETFramework/Recognize_Forms/Recognize_Forms/Data/Input.pdf b/Data-Extraction/Getting-Started/Console/.NETFramework/Recognize_Forms/Recognize_Forms/Data/Input.pdf index 839bbb72..80f58e30 100644 Binary files a/Data-Extraction/Getting-Started/Console/.NETFramework/Recognize_Forms/Recognize_Forms/Data/Input.pdf and b/Data-Extraction/Getting-Started/Console/.NETFramework/Recognize_Forms/Recognize_Forms/Data/Input.pdf differ diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-JSON-from-PDF/.NET/Extract-data-as-JSON-from-PDF-document/README.md b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-JSON-from-PDF/.NET/Extract-data-as-JSON-from-PDF-document/README.md new file mode 100644 index 00000000..4c2192e3 --- /dev/null +++ b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-JSON-from-PDF/.NET/Extract-data-as-JSON-from-PDF-document/README.md @@ -0,0 +1,37 @@ +# Extract Structured Data from PDF + +The Syncfusion® [Smart Data Extractor](https://www.syncfusion.com/document-sdk/net-pdf-data-extraction) is a .NET library used to extract document structures such as hierarchies, text blocks, images, headers, and footers from PDFs and scanned images by analyzing visual layout patterns like lines, boxes, and alignment. It returns structured JSON with per-field confidence scores + +## Steps to Extract Structured Data from PDF Files + +Step 1: **Create a new project:** Begin by setting up a new C# Console Application project. + +Step 2: **Install the NuGet package:** Add the [Syncfusion.SmartDataExtractor.Net.Core](https://www.nuget.org/packages/Syncfusion.SmartDataExtractor.Net.Core) package to your project from [NuGet.org](https://www.nuget.org/). + +Step 3: **Include necessary namespaces:** Add these namespaces in your Program.cs file: + +```csharp +using System.IO; +using System.Text; +using Syncfusion.SmartDataExtractor; + +``` + +Step 4: Add the following code snippet in Program.cs file to extract data from PDF. + +```csharp +// Open the input PDF file as a stream. +using (FileStream stream = new FileStream(Path.GetFullPath("Input.pdf"), FileMode.Open, FileAccess.ReadWrite)) +{ + // Initialize the Smart Data Extractor. + DataExtractor extractor = new DataExtractor(); + // Extract form data as JSON. + string data = extractor.ExtractDataAsJson(stream); + // Save the extracted JSON data into an output file. + File.WriteAllText(Path.GetFullPath(@"Output.json"), data, Encoding.UTF8); +} + +``` +For a complete working example, download it from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/Data-Extraction/Smart-Data-Extractor/Extract-data-as-JSON-from-PDF/.NET). + +More information about Extract data from PDF can be refer in this [documentation](https://help.syncfusion.com/document-processing/data-extraction/smart-data-extractor/overview)section. \ No newline at end of file diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF.sln b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF.sln new file mode 100644 index 00000000..7294b79a --- /dev/null +++ b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 18 +VisualStudioVersion = 18.5.11716.220 stable +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Extract-data-as-MD-from-PDF", "Extract-data-as-MD-from-PDF\Extract-data-as-MD-from-PDF.csproj", "{29872D0F-18F6-6AB7-0892-D538C0E179BA}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {29872D0F-18F6-6AB7-0892-D538C0E179BA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {29872D0F-18F6-6AB7-0892-D538C0E179BA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {29872D0F-18F6-6AB7-0892-D538C0E179BA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {29872D0F-18F6-6AB7-0892-D538C0E179BA}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Data/Input.pdf b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Data/Input.pdf new file mode 100644 index 00000000..839bbb72 Binary files /dev/null and b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Data/Input.pdf differ diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Extract-data-as-MD-from-PDF.csproj b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Extract-data-as-MD-from-PDF.csproj new file mode 100644 index 00000000..3a94b69f --- /dev/null +++ b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Extract-data-as-MD-from-PDF.csproj @@ -0,0 +1,24 @@ + + + + Exe + net8.0 + Extract_data_as_MD_from_PDF + enable + enable + + + + + + + + + Always + + + Always + + + + diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Output/.gitkeep b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Output/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Program.cs b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Program.cs new file mode 100644 index 00000000..d7dc0393 --- /dev/null +++ b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/Program.cs @@ -0,0 +1,13 @@ +using Syncfusion.SmartDataExtractor; +using System.Text; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream(Path.GetFullPath(@"Data\Input.pdf"), FileMode.Open, FileAccess.ReadWrite)) +{ + //Initialize the Smart Data Extractor. + DataExtractor extractor = new DataExtractor(); + //Extract data as Markdown. + string data = extractor.ExtractDataAsMarkdown(stream); + //Save the extracted Markdown data into an output file. + File.WriteAllText(Path.GetFullPath(@"Output\Output.md"), data, Encoding.UTF8); +} diff --git a/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/README.md b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/README.md new file mode 100644 index 00000000..9d8b65aa --- /dev/null +++ b/Data-Extraction/Smart-Data-Extractor/Extract-data-as-MD-from-PDF/.NET/Extract-data-as-MD-from-PDF/README.md @@ -0,0 +1,35 @@ +# Extract Structured Data from PDF as Markdown + +The Syncfusion® [Smart Data Extractor](https://www.syncfusion.com/document-sdk/net-pdf-data-extraction) is a .NET library that extracts document structures such as hierarchies, text blocks, images, headers, and footers from PDFs and scanned images by analyzing visual layout patterns like lines, boxes, and alignment. + +## Steps to Extract Data as Markdown from PDF Files + +Step 1: **Create a new project:** Begin by setting up a new C# Console Application project. + +Step 2: **Install the NuGet package:** Add the [Syncfusion.SmartDataExtractor.Net.Core](https://www.nuget.org/packages/Syncfusion.SmartDataExtractor.Net.Core) package to your project from [NuGet.org](https://www.nuget.org/). + +Step 3: **Include necessary namespaces:** Add these namespaces in your Program.cs file: + +```csharp +using System.IO; +using System.Text; +using Syncfusion.SmartDataExtractor; + +``` + +Step 4: Add the following code snippet in Program.cs file to extract data from PDF. + +```csharp +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) +{ + //Initialize the Smart Data Extractor. + DataExtractor extractor = new DataExtractor(); + //Extract data as Markdown. + string data = extractor.ExtractDataAsMarkdown(stream); + //Save the extracted Markdown data into an output file. + File.WriteAllText("Output.md", data, Encoding.UTF8); +} + +``` +More information about Extract data from PDF can be refer in this [documentation](https://help.syncfusion.com/document-processing/data-extraction/smart-data-extractor/overview)section. \ No newline at end of file diff --git a/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-JSON/.NET/Recognize-forms-using-JSON/README.md b/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-JSON/.NET/Recognize-forms-using-JSON/README.md new file mode 100644 index 00000000..6b2ccc3d --- /dev/null +++ b/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-JSON/.NET/Recognize-forms-using-JSON/README.md @@ -0,0 +1,34 @@ +# Recognize Form Data from PDF using C# + +The Syncfusion® [Smart Form Recognizer](https://www.syncfusion.com/document-sdk/net-pdf-data-extraction) is a .NET C# library that detects form regions and extracts text fields, checkboxes, radio buttons, and signatures by interpreting visual patterns such as boxes and selection markers. The extracted results are returned as normalized JSON with confidence scores, enabling applications to automatically process form data. + +## Steps to Recognize Form Data from PDF Files + +Step 1: **Create a new project:** Begin by setting up a new C# Console Application project. + +Step 2: **Install the NuGet package:** Add the [Syncfusion.SmartFormRecognizer.Net.Core](https://www.nuget.org/packages/Syncfusion.SmartFormRecognizer.Net.Core) package to your project from [NuGet.org](https://www.nuget.org/). + +Step 3: **Include necessary namespaces:** Add these namespaces in your Program.cs file: + +```csharp +using System.IO; +using Syncfusion.SmartFormRecognizer; +``` + +Step 4: Add the following code snippet in Program.cs file to extract data from PDF. + +```csharp +// Read the input PDF file as stream. +using (FileStream inputStream = new FileStream(Path.GetFullPath(@"Input.pdf"), FileMode.Open, FileAccess.ReadWrite)) +{ + // Initialize the Form Recognizer. + FormRecognizer smartFormRecognizer = new FormRecognizer(); + // Recognize the form and get the output as JSON string. + string outputJson = smartFormRecognizer.RecognizeFormAsJson(inputStream); + // Save the output JSON to file. + File.WriteAllText(Path.GetFullPath(@"Output.json"),outputJson); +} +``` +For a complete working example, download it from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-JSON/.NET). + +More information about SmartFormRecognizer can be refer in this [documentation](https://help.syncfusion.com/document-processing/data-extraction/smart-form-recognizer/overview)section. \ No newline at end of file diff --git a/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-Pdf/.NET/Recognize-forms-using-Pdf/Data/Input.pdf b/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-Pdf/.NET/Recognize-forms-using-Pdf/Data/Input.pdf index 839bbb72..80f58e30 100644 Binary files a/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-Pdf/.NET/Recognize-forms-using-Pdf/Data/Input.pdf and b/Data-Extraction/Smart-Form-Recognizer/Recognize-forms-using-Pdf/.NET/Recognize-forms-using-Pdf/Data/Input.pdf differ diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF.sln b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF.sln new file mode 100644 index 00000000..a15e4cd4 --- /dev/null +++ b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 18 +VisualStudioVersion = 18.5.11716.220 stable +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Extract-table-data-as-MD-from-PDF", "Extract-table-data-as-MD-from-PDF\Extract-table-data-as-MD-from-PDF.csproj", "{966488A5-6BD0-4E04-B21F-F94BBB51CF97}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {966488A5-6BD0-4E04-B21F-F94BBB51CF97}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {966488A5-6BD0-4E04-B21F-F94BBB51CF97}.Debug|Any CPU.Build.0 = Debug|Any CPU + {966488A5-6BD0-4E04-B21F-F94BBB51CF97}.Release|Any CPU.ActiveCfg = Release|Any CPU + {966488A5-6BD0-4E04-B21F-F94BBB51CF97}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {AB115D6D-2324-40E5-844D-F6B9A308B5FE} + EndGlobalSection +EndGlobal diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Data/Input.pdf b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Data/Input.pdf new file mode 100644 index 00000000..839bbb72 Binary files /dev/null and b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Data/Input.pdf differ diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Extract-table-data-as-MD-from-PDF.csproj b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Extract-table-data-as-MD-from-PDF.csproj new file mode 100644 index 00000000..6a849e35 --- /dev/null +++ b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Extract-table-data-as-MD-from-PDF.csproj @@ -0,0 +1,24 @@ + + + + Exe + net8.0 + Extract_table_data_as_MD_from_PDF + enable + enable + + + + + + + + + Always + + + Always + + + + diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Output/.gitkeep b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Output/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Program.cs b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Program.cs new file mode 100644 index 00000000..763ede86 --- /dev/null +++ b/Data-Extraction/Smart-Table-Extractor/Extract-table-data-as-MD-from-PDF/.NET/Extract-table-data-as-MD-from-PDF/Program.cs @@ -0,0 +1,14 @@ +using System.IO; +using System.Text; +using Syncfusion.SmartTableExtractor; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream(Path.GetFullPath(@"Data\Input.pdf"), FileMode.Open, FileAccess.ReadWrite)) +{ + // Initialize the Smart Table Extractor + TableExtractor extractor = new TableExtractor(); + //Extract table data from the PDF document as markdown. + string data = extractor.ExtractTableAsMarkdown(stream); + //Save the extracted markdown data into an output file. + File.WriteAllText(Path.GetFullPath(@"Output\Output.md"), data, Encoding.UTF8); +} \ No newline at end of file diff --git a/Data-Extraction/Smart-Table-Extractor/Extract-tables-from-pdf-document/.NET/Extract-tables-from-pdf-document/README.md b/Data-Extraction/Smart-Table-Extractor/Extract-tables-from-pdf-document/.NET/Extract-tables-from-pdf-document/README.md new file mode 100644 index 00000000..c39053b7 --- /dev/null +++ b/Data-Extraction/Smart-Table-Extractor/Extract-tables-from-pdf-document/.NET/Extract-tables-from-pdf-document/README.md @@ -0,0 +1,34 @@ +# Extract Structured Table Data from PDF using C# + +The Syncfusion® [Smart Table Extractor](https://www.syncfusion.com/document-sdk/net-pdf-data-extraction) is a .NET library used to extract table data from PDFs and images. + +## Steps to Structured Table Data from PDF Files + +Step 1: **Create a new project:** Begin by setting up a new C# Console Application project. + +Step 2: **Install the NuGet package:** Add the [Syncfusion.SmartTableExtractor.Net.Core](https://www.nuget.org/packages/Syncfusion.SmartTableExtractor.Net.Core) package to your project from [NuGet.org](https://www.nuget.org/). + +Step 3: **Include necessary namespaces:** Add these namespaces in your Program.cs file: +```csharp +using System.IO; +using System.Text; +using Syncfusion.SmartTableExtractor; +``` + +Step 4: Add the following code snippet in Program.cs file to extract data from PDF. + +```csharp +// Open the input PDF file as a stream. +using (FileStream stream = new FileStream(Path.GetFullPath(@"Input.pdf"), FileMode.Open, FileAccess.ReadWrite)) +{ + // Initialize the Smart Table Extractor. + TableExtractor extractor = new TableExtractor(); + // Extract table data from the PDF document as JSON string. + string data = extractor.ExtractTableAsJson(stream); + // Save the extracted JSON data into an output file. + File.WriteAllText(Path.GetFullPath(@"Output.json"), data, Encoding.UTF8); +} +``` +For a complete working example, download it from [GitHub](https://github.com/SyncfusionExamples/PDF-Examples/tree/master/Data-Extraction/Smart-Table-Extractor/Extract-tables-from-pdf-document/.NET). + +More information about Extract Table Data from PDF can be refer in this [documentation](https://help.syncfusion.com/document-processing/data-extraction/smart-table-extractor/overview)section. \ No newline at end of file