Learning Oracle Application and Software Testing

Monday, August 22, 2022

Extract text from a single image using Python

#Extract text from a single image using Python
from PIL import Image
from pytesseract import pytesseract

#Define path to tessaract.exe
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

#Define path to image
path_to_image = 'chemical.PNG'

#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract

#Open image with PIL
img = Image.open(path_to_image)

#Extract text from image
text = pytesseract.image_to_string(img)
print(text)

👋 Hi, I'm Suriya — QA Engineer with 4+ years of experience in manual, API & automation testing.

📬 Contact Me | LinkedIn | GitHub

📌 Follow for: Real-Time Test Cases, Bug Reports, Selenium Frameworks.

Extract only images from PDF using Python

# How to Extract Images from PDF in Python
import fitz # PyMuPDF
import io
from PIL import Image

# file path you want to extract images from
file = "byju.pdf"

# open the file
pdf_file = fitz.open(file)

# iterate over PDF pages
for page_index in range(len(pdf_file)):

# get the page itself
page = pdf_file[page_index]
image_list = page.get_images()

# printing number of images found in this page
if image_list:
print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
else:
print("[!] No images found on page", page_index)
for image_index, img in enumerate(page.get_images(), start=1):

# get the XREF of the image
xref = img[0]

# extract the image bytes
base_image = pdf_file.extract_image(xref)
image_bytes = base_image["image"]

# get the image extension
image_ext = base_image["ext"]

# load it to PIL
image = Image.open(io.BytesIO(image_bytes))

# save it to local disk
image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))

👋 Hi, I'm Suriya — QA Engineer with 4+ years of experience in manual, API & automation testing.

📬 Contact Me | LinkedIn | GitHub

📌 Follow for: Real-Time Test Cases, Bug Reports, Selenium Frameworks.

Extract text and image from PDF files in python

# Module Imports
import os
from PIL import Image
import pytesseract
from pdf2image import convert_from_path

# Define Paths
poppler_path = r'C:\Program Files\poppler-0.68.0\poppler-0.68.0\bin'
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
pdf_path = "chemistry.pdf"

# Save PDF pages to images
images = convert_from_path(pdf_path=pdf_path, poppler_path=poppler_path)
for count, img in enumerate(images):
img_name = f"page_{count}.png"
img.save(img_name, "png")

# Extract Text
png_files = [f for f in os.listdir(".") if f.endswith(".png")]
for png_file in png_files:
extracted_text = pytesseract.image_to_string(Image.open(png_file))
print(extracted_text)

Reference - https://www.gcptutorials.com/post/python-extract-text-from-pdf-files

👋 Hi, I'm Suriya — QA Engineer with 4+ years of experience in manual, API & automation testing.

📬 Contact Me | LinkedIn | GitHub

📌 Follow for: Real-Time Test Cases, Bug Reports, Selenium Frameworks.

Tuesday, August 2, 2022

Read excel file in selenium webdriver using jxl

package coaching;

import java.io.FileInputStream;

import java.io.IOException;

import java.time.Duration;

import org.openqa.selenium.By;

import org.openqa.selenium.WebDriver;

import org.openqa.selenium.WebElement;

import org.openqa.selenium.chrome.ChromeDriver;

import org.testng.annotations.DataProvider;

import org.testng.annotations.Test;

import jxl.Sheet;

import jxl.Workbook;

import jxl.read.biff.BiffException;

public class JxlData {

WebDriver driver;

String [][] data = null;

@DataProvider(name="loginData")

public String [][] loginDataProvider() throws BiffException, IOException{

data=getExcelData();

// string change to object

// jxl jar used only xls format(97-2003 worksheet)

return data;

}

public String[][] getExcelData() throws BiffException, IOException{

FileInputStream excel = new FileInputStream("F:\\Suriya\\suri.xls");

Workbook workbook = Workbook.getWorkbook(excel);

Sheet sheet = workbook.getSheet(0); // sheet name

int rowCount = sheet.getRows();

int columnCount = sheet.getColumns();

String testData[][] = new String[rowCount-1][columnCount];

for (int i=1; i<rowCount;i++) {

for (int j=0;j<columnCount;j++) {

testData[i-1][j]=sheet.getCell(j, i).getContents();

}

return testData;

}

@Test(dataProvider="loginData")

public void login(String uName, String pword) {

System.setProperty("webdriver.chrome.driver", "F:\\Suriya\\chromedriver.exe");

driver = new ChromeDriver();

driver.manage().window().maximize();

driver.manage().deleteAllCookies();

driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(30));

driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(30));

driver.get("URL");

WebElement username = driver.findElement(By.id("userName"));

username.sendKeys(uName);

WebElement password = driver.findElement(By.id("pwd"));

password.sendKeys(pword);

WebElement login = driver.findElement(By.cssSelector(".btn:nth-child(3)"));

}

👋 Hi, I'm Suriya — QA Engineer with 4+ years of experience in manual, API & automation testing.

📬 Contact Me | LinkedIn | GitHub

📌 Follow for: Real-Time Test Cases, Bug Reports, Selenium Frameworks.

Selenium using Data Provider Method

package coaching;

import java.time.Duration;

import org.openqa.selenium.By;

import org.openqa.selenium.WebDriver;

import org.openqa.selenium.WebElement;

import org.openqa.selenium.chrome.ChromeDriver;

import org.testng.annotations.DataProvider;

import org.testng.annotations.Test;

public class DataProviderTest {

WebDriver driver;

@DataProvider(name = "Authentication")

public static Object[][] credentials() {

// The number of times data is repeated, test will be executed the same no. of times

// Here it will execute two times

return new Object[][] { { "suriya", "Test@123" }, { "parithy", "Test@123" }};

}

@Test(dataProvider = "Authentication")

public void test(String username, String password) {

System.setProperty("webdriver.chrome.driver", "F:\\Suriya\\chromedriver.exe");

driver = new ChromeDriver();

driver.manage().window().maximize();

driver.manage().deleteAllCookies();

driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(30));

driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(30));

driver.get("URL");

WebElement username1 = driver.findElement(By.id("userName"));

username1.sendKeys(username);

WebElement password1 = driver.findElement(By.id("pwd"));

password1.sendKeys(password);

WebElement login = driver.findElement(By.cssSelector(".btn:nth-child(3)"));

}

👋 Hi, I'm Suriya — QA Engineer with 4+ years of experience in manual, API & automation testing.

📬 Contact Me | LinkedIn | GitHub

📌 Follow for: Real-Time Test Cases, Bug Reports, Selenium Frameworks.

Pages

Monday, August 22, 2022

Extract text from a single image using Python

Extract only images from PDF using Python

Extract text and image from PDF files in python

Tuesday, August 2, 2022

Read excel file in selenium webdriver using jxl

Selenium using Data Provider Method

Popular Posts

Blog Archive