Monday, August 22, 2022

Image File stored as a BLOB in MySQL Table using Python

 # Import the required modules
import mysql.connector
import base64
from PIL import Image
import io

# Create a connection
mydb = mysql.connector.connect(
host="localhost",
user="suriyaparithy",
password="suriyaparithy",
database="database" # Name of the database
)

# Create a cursor object
cursor = mydb.cursor()

# Open a file in binary mode
file = open('chemical.PNG','rb').read()

# We must encode the file to get base64 string
file = base64.b64encode(file)

# Sample data to be inserted
args = ('100', 'Sample Name', file)

# Prepare a query
query = 'INSERT INTO PROFILE VALUES(%s, %s, %s)'

# Execute the query and commit the database.
cursor.execute(query,args)
mydb.commit()

Extract text from a single image using Python

#Extract text from a single image using Python
from PIL import Image
from pytesseract import pytesseract

#Define path to tessaract.exe
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

#Define path to image
path_to_image = 'chemical.PNG'

#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract

#Open image with PIL
img = Image.open(path_to_image)

#Extract text from image
text = pytesseract.image_to_string(img)
print(text)

Extract only images from PDF using Python

 # How to Extract Images from PDF in Python
import fitz # PyMuPDF
import io
from PIL import Image

# file path you want to extract images from
file = "byju.pdf"

# open the file
pdf_file = fitz.open(file)

# iterate over PDF pages
for page_index in range(len(pdf_file)):

    # get the page itself
    page = pdf_file[page_index]
    image_list = page.get_images()

    # printing number of images found in this page
    if image_list:
        print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
    else:
        print("[!] No images found on page", page_index)
    for image_index, img in enumerate(page.get_images(), start=1):

        # get the XREF of the image
        xref = img[0]

        # extract the image bytes
        base_image = pdf_file.extract_image(xref)
        image_bytes = base_image["image"]

        # get the image extension
        image_ext = base_image["ext"]

        # load it to PIL
        image = Image.open(io.BytesIO(image_bytes))

        # save it to local disk
        image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))

Extract text and image from PDF files in python

# Module Imports
import os
from PIL import Image
import pytesseract
from pdf2image import convert_from_path

# Define Paths
poppler_path = r'C:\Program Files\poppler-0.68.0\poppler-0.68.0\bin'
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
pdf_path = "chemistry.pdf"

# Save PDF pages to images
images = convert_from_path(pdf_path=pdf_path, poppler_path=poppler_path)
for count, img in enumerate(images):
    img_name = f"page_{count}.png"  
    img.save(img_name, "png")

# Extract Text
png_files = [f for f in os.listdir(".") if f.endswith(".png")]
for png_file in png_files:
    extracted_text = pytesseract.image_to_string(Image.open(png_file))
    print(extracted_text)

Reference - https://www.gcptutorials.com/post/python-extract-text-from-pdf-files 

Tuesday, August 2, 2022

Read excel file in selenium webdriver using jxl

 package coaching;

import java.io.FileInputStream;

import java.io.IOException;

import java.time.Duration;

import org.openqa.selenium.By;

import org.openqa.selenium.WebDriver;

import org.openqa.selenium.WebElement;

import org.openqa.selenium.chrome.ChromeDriver;

import org.testng.annotations.DataProvider;

import org.testng.annotations.Test;

import jxl.Sheet;

import jxl.Workbook;

import jxl.read.biff.BiffException;


public class JxlData {

WebDriver driver;

String [][] data = null;

@DataProvider(name="loginData")

public String [][] loginDataProvider() throws BiffException, IOException{

data=getExcelData();

// string change to object

// jxl jar used only xls format(97-2003 worksheet)

return data;

}

  public String[][] getExcelData() throws BiffException, IOException{

  

  FileInputStream excel = new FileInputStream("F:\\Suriya\\suri.xls");

   Workbook workbook = Workbook.getWorkbook(excel);

  Sheet sheet = workbook.getSheet(0); // sheet name

  int rowCount = sheet.getRows();

  int columnCount = sheet.getColumns();

  String testData[][] = new String[rowCount-1][columnCount];

  

  for (int i=1; i<rowCount;i++) {

  for (int j=0;j<columnCount;j++) {

  testData[i-1][j]=sheet.getCell(j, i).getContents();

  }

  }

   return testData;

  }

  @Test(dataProvider="loginData")

  public void login(String uName, String pword) {

  System.setProperty("webdriver.chrome.driver", "F:\\Suriya\\chromedriver.exe");

driver = new ChromeDriver();

driver.manage().window().maximize();

driver.manage().deleteAllCookies();

driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(30));

driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(30));

driver.get("URL");

WebElement username = driver.findElement(By.id("userName"));

username.sendKeys(uName);


WebElement password = driver.findElement(By.id("pwd"));

password.sendKeys(pword);


WebElement login = driver.findElement(By.cssSelector(".btn:nth-child(3)"));

login.click();

    }

}