Advertisement

Python Project With Source Code - Website Summarization API

Python Project With Source Code - Website Summarization API

Website Summarization API

This project is carried out for the purpose of building a machine learning

model for summarising a website from urls;

Getting Started

These instructions will get you a copy of the project up and running on your

local machine for development and testing purposes.

Prerequisites

Python distribution

```

Anaconda

```

Installing

Install Anaconda python distribution on your system

Create a virtual environment called env.

```

python -m venv app

```

Activate the virtual environment

```

LINUX/Mac: source app/bin/activate

Windows: app\Scripts\activate

```

Upgrade to the latest pip

```

pip install --upgrade pip

```

Install dependencies using requirements file

```

pip install -r requirements.txt

```

**Note: Your virtual environment must always be activated before running

any command**

Deployment

Start app (Make sure to enter a valid website to an existing website)

Example of valid commands

```

python app.py simple --url https://facebook.com --sentence 1 --language

english

python app.py simple --url https://facebook.com

python app.py simple --url https://korapay.com

python app.py bulk --path ./csv/valid_websites.csv

```

APIs

This are command options in full:

```

A command line utility for website Summarization.

-----------------------------------------------

These are common commands for this app.

positional arguments:

action This has to be 'summarize'

optional arguments:

-h, --help show this help message and exit

--website PATH website of the url to be summarised

Requirements:

utils==1.0.1

sumeval==0.2.2

tensorflow==2.3.0

wget==3.2

sumy==0.8.1

model==0.6.0

numpy==1.19.1

newspaper==0.1.0.7

nltk==3.5

gensim==3.8.3

Source Code:

#!/usr/bin/python

from utils.summarize import summarize

import csv

import shutil

import os

import textwrap

import logging

import argparse

import sys

def parse_args(argv):

parser = argparse.ArgumentParser(

formatter_class=argparse.RawDescriptionHelpFormatter,

description=textwrap.dedent('''\

A command line utility for website summarization.

-----------------------------------------------

These are common commands for this app.'''))

parser.add_argument(

'action',

help='This action should be summarize')

parser.add_argument(

'--url',

help='A link to the website url'

)

parser.add_argument(

'--sentence',

help='Argument to define number of sentence for the summary',

type=int,

default=2)

parser.add_argument(

'--language',

help='Argument to define language of the summary',

default='English')

parser.add_argument(

'--path',

help='path to csv file')

return parser.parse_args(argv[1:])

def readCsv(path):

print('\n\n Processing Csv file \n\n')

sys.stdout.flush()

data = []

try:

with open(path, 'r') as userFile:

userFileReader = csv.reader(userFile)

for row in userFileReader:

data.append(row)

except:

with open(path, 'r', encoding="mbcs") as userFile:

userFileReader = csv.reader(userFile)

for row in userFileReader:

data.append(row)

return data

def writeCsv(data, LANGUAGE, SENTENCES_COUNT):

print('\n\n Updating Csv file \n\n')

sys.stdout.flush()

with open('beneficiary.csv', 'w') as newFile:

newFileWriter = csv.writer(newFile)

length = len(data)

position = data[0].index('website')

for i in range(1, length):

if i == 1:

_data = data[0]

_data.append("summary")

newFileWriter.writerow(_data)

try:

__data = data[i]

summary = summarize(

(data[i][position]), LANGUAGE, SENTENCES_COUNT)

__data.append(summary)

newFileWriter.writerow(__data)

except:

print('\n\n Error Skipping line \n\n')

sys.stdout.flush()

def processCsv(path, LANGUAGE, SENTENCES_COUNT):

try:

print('\n\n Proessing Started \n\n')

sys.stdout.flush()

data = readCsv(path)

writeCsv(data, LANGUAGE, SENTENCES_COUNT)

except:

print('\n\n Invalid file in file path \n\n')

sys.stdout.flush()

def main(argv=sys.argv):

# Configure logging

logging.basicConfig(filename='applog.log',

filemode='w',

level=logging.INFO,

format='%(levelname)s:%(message)s')

args = parse_args(argv)

action = args.action

url = args.url

path = args.path

LANGUAGE = "english" if args.language is None else args.language

SENTENCES_COUNT = 2 if args.sentence is None else args.sentence

if action == 'bulk':

if path is None:

print(

'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')

sys.stdout.flush()

return

# guide against errors

try:

processCsv(path, LANGUAGE, SENTENCES_COUNT)

except:

print(

'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')

sys.stdout.flush()

print('Completed')

sys.stdout.flush()

if os.path.isfile('beneficiary.csv'):

return shutil.move('beneficiary.csv', path)

return

if action == 'simple':

# guide against errors

try:

summarize(url, LANGUAGE, SENTENCES_COUNT)

except:

print(

'\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')

sys.stdout.flush()

print('Completed')

sys.stdout.flush()

else:

print(

'\nAction command is not supported\n for help: run python3 app.py -

h'

)

sys.stdout.flush()

return

if __name__ == '__main__':

main()

Post a Comment

0 Comments